diff options
author | James Morris <jmorris@namei.org> | 2008-08-28 10:47:34 +1000 |
---|---|---|
committer | James Morris <jmorris@namei.org> | 2008-08-28 10:47:34 +1000 |
commit | 86d688984deefa3ae5a802880c11f2b408b5d6cf (patch) | |
tree | 7ea5e8189b0a774626d3ed7c3c87df2495a4c4a0 /fs | |
parent | 93c06cbbf9fea5d5be1778febb7fa9ab1a74e5f5 (diff) | |
parent | 4c246edd2550304df5b766cc841584b2bb058843 (diff) | |
download | blackbird-op-linux-86d688984deefa3ae5a802880c11f2b408b5d6cf.tar.gz blackbird-op-linux-86d688984deefa3ae5a802880c11f2b408b5d6cf.zip |
Merge branch 'master' into next
Diffstat (limited to 'fs')
172 files changed, 3499 insertions, 3196 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 88e3787c6ea9..e298fe194093 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) const struct file_operations v9fs_dir_operations = { .read = generic_read_dir, + .llseek = generic_file_llseek, .readdir = v9fs_dir_readdir, .open = v9fs_file_open, .release = v9fs_dir_release, diff --git a/fs/Kconfig b/fs/Kconfig index d3873583360b..abccb5dab9a8 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1930,6 +1930,16 @@ config CIFS_WEAK_PW_HASH If unsure, say N. +config CIFS_UPCALL + bool "Kerberos/SPNEGO advanced session setup" + depends on CIFS && KEYS + help + Enables an upcall mechanism for CIFS which accesses + userspace helper utilities to provide SPNEGO packaged (RFC 4178) + Kerberos tickets which are needed to mount to certain secure servers + (for which more secure Kerberos authentication is required). If + unsure, say N. + config CIFS_XATTR bool "CIFS extended attributes" depends on CIFS @@ -1982,17 +1992,6 @@ config CIFS_EXPERIMENTAL (which is disabled by default). See the file fs/cifs/README for more details. If unsure, say N. -config CIFS_UPCALL - bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" - depends on CIFS_EXPERIMENTAL - depends on KEYS - help - Enables an upcall mechanism for CIFS which accesses - userspace helper utilities to provide SPNEGO packaged (RFC 4178) - Kerberos tickets which are needed to mount to certain secure servers - (for which more secure Kerberos authentication is required). If - unsure, say N. - config CIFS_DFS_UPCALL bool "DFS feature support (EXPERIMENTAL)" depends on CIFS_EXPERIMENTAL diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index fc1a8dc64d78..85a30e929800 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -197,6 +197,7 @@ out: const struct file_operations adfs_dir_operations = { .read = generic_read_dir, + .llseek = generic_file_llseek, .readdir = adfs_readdir, .fsync = file_fsync, }; diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 6e3f282424b0..7b36904dbeac 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t); const struct file_operations affs_dir_operations = { .read = generic_read_dir, + .llseek = generic_file_llseek, .readdir = affs_readdir, .fsync = file_fsync, }; diff --git a/fs/afs/write.c b/fs/afs/write.c index 9a849ad3c489..065b4e10681a 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, page = pages[loop]; if (page->index > wb->last) break; - if (TestSetPageLocked(page)) + if (!trylock_page(page)) break; if (!PageDirty(page) || page_private(page) != (unsigned long) wb) { diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index bcfb2dc0a61b..2a41c2a7fc52 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = { .release = dcache_dir_close, .read = generic_read_dir, .readdir = dcache_readdir, + .llseek = dcache_dir_lseek, .ioctl = autofs4_root_ioctl, }; @@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = { .release = dcache_dir_close, .read = generic_read_dir, .readdir = dcache_readdir, + .llseek = dcache_dir_lseek, }; const struct inode_operations autofs4_indirect_root_inode_operations = { diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 02c6e62b72f8..740f53672a8a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep; static const struct file_operations befs_dir_operations = { .read = generic_read_dir, .readdir = befs_readdir, + .llseek = generic_file_llseek, }; static const struct inode_operations befs_dir_inode_operations = { diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 56372ecf1690..dfc0197905ca 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -914,7 +914,9 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) /* Stash our initial stack pointer into the mm structure */ current->mm->start_stack = (unsigned long )sp; - +#ifdef FLAT_PLAT_INIT + FLAT_PLAT_INIT(regs); +#endif DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n", (int)regs, (int)start_addr, (int)current->mm->start_stack); diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 756205314c24..8d7e88e02e0f 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -120,8 +120,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (bprm->misc_bang) goto _ret; - bprm->misc_bang = 1; - /* to keep locking time low, we copy the interpreter string */ read_lock(&entries_lock); fmt = check_file(bprm); @@ -199,6 +197,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) if (retval < 0) goto _error; + bprm->misc_bang = 1; + retval = search_binary_handler (bprm, regs); if (retval < 0) goto _error; @@ -77,11 +77,8 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct */ bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); - if (bvl) { - struct biovec_slab *bp = bvec_slabs + *idx; - - memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec)); - } + if (bvl) + memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); return bvl; } @@ -149,7 +146,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) goto out; } bio->bi_flags |= idx << BIO_POOL_OFFSET; - bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; + bio->bi_max_vecs = bvec_nr_vecs(idx); } bio->bi_io_vec = bvl; } @@ -472,20 +469,21 @@ static void bio_free_map_data(struct bio_map_data *bmd) kfree(bmd); } -static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) +static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, + gfp_t gfp_mask) { - struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); + struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask); if (!bmd) return NULL; - bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); + bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask); if (!bmd->iovecs) { kfree(bmd); return NULL; } - bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); + bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask); if (bmd->sgvecs) return bmd; @@ -494,8 +492,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) return NULL; } -static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, - int uncopy) +static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, + struct sg_iovec *iov, int iov_count, int uncopy) { int ret = 0, i; struct bio_vec *bvec; @@ -505,7 +503,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, __bio_for_each_segment(bvec, bio, i, 0) { char *bv_addr = page_address(bvec->bv_page); - unsigned int bv_len = bvec->bv_len; + unsigned int bv_len = iovecs[i].bv_len; while (bv_len && iov_idx < iov_count) { unsigned int bytes; @@ -557,7 +555,7 @@ int bio_uncopy_user(struct bio *bio) struct bio_map_data *bmd = bio->bi_private; int ret; - ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1); + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); bio_free_map_data(bmd); bio_put(bio); @@ -599,7 +597,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, len += iov[i].iov_len; } - bmd = bio_alloc_map_data(nr_pages, iov_count); + bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); if (!bmd) return ERR_PTR(-ENOMEM); @@ -636,7 +634,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, * success */ if (!write_to_vm) { - ret = __bio_copy_iov(bio, iov, iov_count, 0); + ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); if (ret) goto cleanup; } @@ -945,19 +943,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err) { struct bio_vec *bvec; const int read = bio_data_dir(bio) == READ; - char *p = bio->bi_private; + struct bio_map_data *bmd = bio->bi_private; int i; + char *p = bmd->sgvecs[0].iov_base; __bio_for_each_segment(bvec, bio, i, 0) { char *addr = page_address(bvec->bv_page); + int len = bmd->iovecs[i].bv_len; if (read && !err) - memcpy(p, addr, bvec->bv_len); + memcpy(p, addr, len); __free_page(bvec->bv_page); - p += bvec->bv_len; + p += len; } + bio_free_map_data(bmd); bio_put(bio); } @@ -981,11 +982,21 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, const int nr_pages = end - start; struct bio *bio; struct bio_vec *bvec; + struct bio_map_data *bmd; int i, ret; + struct sg_iovec iov; + + iov.iov_base = data; + iov.iov_len = len; + + bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask); + if (!bmd) + return ERR_PTR(-ENOMEM); + ret = -ENOMEM; bio = bio_alloc(gfp_mask, nr_pages); if (!bio) - return ERR_PTR(-ENOMEM); + goto out_bmd; while (len) { struct page *page; @@ -1019,14 +1030,18 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, } } - bio->bi_private = data; + bio->bi_private = bmd; bio->bi_end_io = bio_copy_kern_endio; + + bio_set_map_data(bmd, bio, &iov, 1); return bio; cleanup: bio_for_each_segment(bvec, bio, i) __free_page(bvec->bv_page); bio_put(bio); +out_bmd: + bio_free_map_data(bmd); return ERR_PTR(ret); } diff --git a/fs/buffer.c b/fs/buffer.c index 4dbe52948e8f..ac78d4c19b3b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1720,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, */ if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { lock_buffer(bh); - } else if (test_set_buffer_locked(bh)) { + } else if (!trylock_buffer(bh)) { redirty_page_for_writepage(wbc, page); continue; } @@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(!buffer_mapped(bh)); BUG_ON(!bh->b_end_io); - if (buffer_ordered(bh) && (rw == WRITE)) - rw = WRITE_BARRIER; + /* + * Mask in barrier bit for a write (could be either a WRITE or a + * WRITE_SYNC + */ + if (buffer_ordered(bh) && (rw & WRITE)) + rw |= WRITE_BARRIER; /* - * Only clear out a write error when rewriting, should this - * include WRITE_SYNC as well? + * Only clear out a write error when rewriting */ - if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER)) + if (test_set_buffer_req(bh) && (rw & WRITE)) clear_buffer_write_io_error(bh); /* @@ -3000,7 +3003,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) if (rw == SWRITE || rw == SWRITE_SYNC) lock_buffer(bh); - else if (test_set_buffer_locked(bh)) + else if (!trylock_buffer(bh)) continue; if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 1f3465201fdf..f9e4ad97a79e 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,16 @@ +Version 1.54 +------------ +Fix premature write failure on congested networks (we would give up +on EAGAIN from the socket too quickly on large writes). +Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. +Fix endian problems in acl (mode from/to cifs acl) on bigendian +architectures. Fix problems with preserving timestamps on copying open +files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit +on parent directory when server supports Unix Extensions but not POSIX +create. Update cifs.upcall version to handle new Kerberos sec flags +(this requires update of cifs.upcall program from Samba). Fix memory leak +on dns_upcall (resolving DFS referralls). + Version 1.53 ------------ DFS support added (Microsoft Distributed File System client support needed diff --git a/fs/cifs/README b/fs/cifs/README index 2bd6fe556f88..68b5c1169d9d 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -642,8 +642,30 @@ The statistics for the number of total SMBs and oplock breaks are different in that they represent all for that share, not just those for which the server returned success. -Also note that "cat /proc/fs/cifs/DebugData" will display information about +Also note that "cat /proc/fs/cifs/DebugData" will display information about the active sessions and the shares that are mounted. -Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is -on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and -LANMAN support do not require this helper. + +Enabling Kerberos (extended security) works but requires version 1.2 or later +of the helper program cifs.upcall to be present and to be configured in the +/etc/request-key.conf file. The cifs.upcall helper program is from the Samba +project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not +require this helper. Note that NTLMv2 security (which does not require the +cifs.upcall helper program), instead of using Kerberos, is sufficient for +some use cases. + +Enabling DFS support (used to access shares transparently in an MS-DFS +global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In +addition, DFS support for target shares which are specified as UNC +names which begin with host names (rather than IP addresses) requires +a user space helper (such as cifs.upcall) to be present in order to +translate host names to ip address, and the user space helper must also +be configured in the file /etc/request-key.conf + +To use cifs Kerberos and DFS support, the Linux keyutils package should be +installed and something like the following lines should be added to the +/etc/request-key.conf file: + +create cifs.spnego * * /usr/local/sbin/cifs.upcall %k +create dns_resolver * * /usr/local/sbin/cifs.upcall %k + + diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 6bb440b257b0..1b09f1670061 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, unsigned int cls, con, tag, oidlen, rc; bool use_ntlmssp = false; bool use_kerberos = false; + bool use_mskerberos = false; *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ @@ -483,6 +484,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, asn1_open(&ctx, security_blob, length); + /* GSSAPI header */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding negTokenInit header")); return 0; @@ -490,156 +492,149 @@ decode_negTokenInit(unsigned char *security_blob, int length, || (tag != ASN1_EOC)) { cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag)); return 0; - } else { - /* remember to free obj->oid */ - rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); - if (rc) { - if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { - rc = asn1_oid_decode(&ctx, end, &oid, &oidlen); - if (rc) { - rc = compare_oid(oid, oidlen, - SPNEGO_OID, - SPNEGO_OID_LEN); - kfree(oid); - } - } else - rc = 0; - } + } - if (!rc) { - cFYI(1, ("Error decoding negTokenInit header")); - return 0; - } + /* Check for SPNEGO OID -- remember to free obj->oid */ + rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); + if (rc) { + if ((tag == ASN1_OJI) && (con == ASN1_PRI) && + (cls == ASN1_UNI)) { + rc = asn1_oid_decode(&ctx, end, &oid, &oidlen); + if (rc) { + rc = compare_oid(oid, oidlen, SPNEGO_OID, + SPNEGO_OID_LEN); + kfree(oid); + } + } else + rc = 0; + } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, ("Error decoding negTokenInit")); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON) - || (tag != ASN1_EOC)) { - cFYI(1, - ("cls = %d con = %d tag = %d end = %p (%d) exit 0", - cls, con, tag, end, *end)); - return 0; - } + /* SPNEGO OID not present or garbled -- bail out */ + if (!rc) { + cFYI(1, ("Error decoding negTokenInit header")); + return 0; + } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, ("Error decoding negTokenInit")); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_CON) - || (tag != ASN1_SEQ)) { - cFYI(1, - ("cls = %d con = %d tag = %d end = %p (%d) exit 1", - cls, con, tag, end, *end)); - return 0; - } + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding negTokenInit")); + return 0; + } else if ((cls != ASN1_CTX) || (con != ASN1_CON) + || (tag != ASN1_EOC)) { + cFYI(1, + ("cls = %d con = %d tag = %d end = %p (%d) exit 0", + cls, con, tag, end, *end)); + return 0; + } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, ("Error decoding 2nd part of negTokenInit")); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON) - || (tag != ASN1_EOC)) { - cFYI(1, - ("cls = %d con = %d tag = %d end = %p (%d) exit 0", - cls, con, tag, end, *end)); - return 0; - } + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding negTokenInit")); + return 0; + } else if ((cls != ASN1_UNI) || (con != ASN1_CON) + || (tag != ASN1_SEQ)) { + cFYI(1, + ("cls = %d con = %d tag = %d end = %p (%d) exit 1", + cls, con, tag, end, *end)); + return 0; + } - if (asn1_header_decode - (&ctx, &sequence_end, &cls, &con, &tag) == 0) { - cFYI(1, ("Error decoding 2nd part of negTokenInit")); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_CON) - || (tag != ASN1_SEQ)) { - cFYI(1, - ("cls = %d con = %d tag = %d end = %p (%d) exit 1", - cls, con, tag, end, *end)); - return 0; - } + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding 2nd part of negTokenInit")); + return 0; + } else if ((cls != ASN1_CTX) || (con != ASN1_CON) + || (tag != ASN1_EOC)) { + cFYI(1, + ("cls = %d con = %d tag = %d end = %p (%d) exit 0", + cls, con, tag, end, *end)); + return 0; + } - while (!asn1_eoc_decode(&ctx, sequence_end)) { - rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); - if (!rc) { - cFYI(1, - ("Error decoding negTokenInit hdr exit2")); - return 0; - } - if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { - if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) { - - cFYI(1, - ("OID len = %d oid = 0x%lx 0x%lx " - "0x%lx 0x%lx", - oidlen, *oid, *(oid + 1), - *(oid + 2), *(oid + 3))); - - if (compare_oid(oid, oidlen, - MSKRB5_OID, - MSKRB5_OID_LEN)) - use_kerberos = true; - else if (compare_oid(oid, oidlen, - KRB5_OID, - KRB5_OID_LEN)) - use_kerberos = true; - else if (compare_oid(oid, oidlen, - NTLMSSP_OID, - NTLMSSP_OID_LEN)) - use_ntlmssp = true; - - kfree(oid); - } - } else { - cFYI(1, ("Should be an oid what is going on?")); - } - } + if (asn1_header_decode + (&ctx, &sequence_end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding 2nd part of negTokenInit")); + return 0; + } else if ((cls != ASN1_UNI) || (con != ASN1_CON) + || (tag != ASN1_SEQ)) { + cFYI(1, + ("cls = %d con = %d tag = %d end = %p (%d) exit 1", + cls, con, tag, end, *end)); + return 0; + } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, - ("Error decoding last part negTokenInit exit3")); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - /* tag = 3 indicating mechListMIC */ + while (!asn1_eoc_decode(&ctx, sequence_end)) { + rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); + if (!rc) { cFYI(1, - ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end)); + ("Error decoding negTokenInit hdr exit2")); return 0; } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, - ("Error decoding last part negTokenInit exit5")); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_CON) - || (tag != ASN1_SEQ)) { - cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end)); + if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { + if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) { + + cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx " + "0x%lx 0x%lx", oidlen, *oid, + *(oid + 1), *(oid + 2), *(oid + 3))); + + if (compare_oid(oid, oidlen, MSKRB5_OID, + MSKRB5_OID_LEN) && + !use_kerberos) + use_mskerberos = true; + else if (compare_oid(oid, oidlen, KRB5_OID, + KRB5_OID_LEN) && + !use_mskerberos) + use_kerberos = true; + else if (compare_oid(oid, oidlen, NTLMSSP_OID, + NTLMSSP_OID_LEN)) + use_ntlmssp = true; + + kfree(oid); + } + } else { + cFYI(1, ("Should be an oid what is going on?")); } + } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, - ("Error decoding last part negTokenInit exit 7")); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - cFYI(1, - ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end)); - return 0; - } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, - ("Error decoding last part negTokenInit exit9")); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) - || (tag != ASN1_GENSTR)) { - cFYI(1, - ("Exit10 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end)); - return 0; - } - cFYI(1, ("Need to call asn1_octets_decode() function for %s", - ctx.pointer)); /* is this UTF-8 or ASCII? */ + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding last part negTokenInit exit3")); + return 0; + } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { + /* tag = 3 indicating mechListMIC */ + cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end)); + return 0; + } + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding last part negTokenInit exit5")); + return 0; + } else if ((cls != ASN1_UNI) || (con != ASN1_CON) + || (tag != ASN1_SEQ)) { + cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end)); + } + + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding last part negTokenInit exit 7")); + return 0; + } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { + cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end)); + return 0; + } + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding last part negTokenInit exit9")); + return 0; + } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) + || (tag != ASN1_GENSTR)) { + cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end)); + return 0; } + cFYI(1, ("Need to call asn1_octets_decode() function for %s", + ctx.pointer)); /* is this UTF-8 or ASCII? */ if (use_kerberos) *secType = Kerberos; + else if (use_mskerberos) + *secType = MSKerberos; else if (use_ntlmssp) *secType = NTLMSSP; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 688a2d42153f..69a12aae91d3 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -79,27 +79,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server) spin_lock(&GlobalMid_Lock); list_for_each(tmp, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - if (mid_entry) { - cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", - mid_entry->midState, - (int)mid_entry->command, - mid_entry->pid, - mid_entry->tsk, - mid_entry->mid)); + cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", + mid_entry->midState, + (int)mid_entry->command, + mid_entry->pid, + mid_entry->tsk, + mid_entry->mid)); #ifdef CONFIG_CIFS_STATS2 - cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", - mid_entry->largeBuf, - mid_entry->resp_buf, - mid_entry->when_received, - jiffies)); + cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", + mid_entry->largeBuf, + mid_entry->resp_buf, + mid_entry->when_received, + jiffies)); #endif /* STATS2 */ - cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, - mid_entry->multiEnd)); - if (mid_entry->resp_buf) { - cifs_dump_detail(mid_entry->resp_buf); - cifs_dump_mem("existing buf: ", - mid_entry->resp_buf, 62); - } + cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, + mid_entry->multiEnd)); + if (mid_entry->resp_buf) { + cifs_dump_detail(mid_entry->resp_buf); + cifs_dump_mem("existing buf: ", + mid_entry->resp_buf, 62); } } spin_unlock(&GlobalMid_Lock); @@ -163,16 +161,13 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) mid_entry = list_entry(tmp1, struct mid_q_entry, qhead); - if (mid_entry) { - seq_printf(m, - "State: %d com: %d pid:" - " %d tsk: %p mid %d\n", - mid_entry->midState, - (int)mid_entry->command, - mid_entry->pid, - mid_entry->tsk, - mid_entry->mid); - } + seq_printf(m, "State: %d com: %d pid:" + " %d tsk: %p mid %d\n", + mid_entry->midState, + (int)mid_entry->command, + mid_entry->pid, + mid_entry->tsk, + mid_entry->mid); } spin_unlock(&GlobalMid_Lock); } diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 7013aaff6aed..117ef4bba68e 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -66,8 +66,8 @@ struct key_type cifs_spnego_key_type = { .describe = user_describe, }; -#define MAX_VER_STR_LEN 9 /* length of longest version string e.g. - strlen(";ver=0xFF") */ +#define MAX_VER_STR_LEN 8 /* length of longest version string e.g. + strlen("ver=0xFF") */ #define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg in future could have strlen(";sec=ntlmsspi") */ #define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ @@ -81,11 +81,15 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) struct key *spnego_key; const char *hostname = server->hostname; - /* BB: come up with better scheme for determining length */ - /* length of fields (with semicolons): ver=0xyz ipv4= ipaddress host= - hostname sec=mechanism uid=0x uid */ - desc_len = MAX_VER_STR_LEN + 5 + MAX_IPV6_ADDR_LEN + 1 + 6 + - strlen(hostname) + MAX_MECH_STR_LEN + 8 + (sizeof(uid_t) * 2); + /* length of fields (with semicolons): ver=0xyz ip4=ipaddress + host=hostname sec=mechanism uid=0xFF user=username */ + desc_len = MAX_VER_STR_LEN + + 6 /* len of "host=" */ + strlen(hostname) + + 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN + + MAX_MECH_STR_LEN + + 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) + + 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1; + spnego_key = ERR_PTR(-ENOMEM); description = kzalloc(desc_len, GFP_KERNEL); if (description == NULL) @@ -110,9 +114,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) dp = description + strlen(description); - /* for now, only sec=krb5 is valid */ + /* for now, only sec=krb5 and sec=mskrb5 are valid */ if (server->secType == Kerberos) sprintf(dp, ";sec=krb5"); + else if (server->secType == MSKerberos) + sprintf(dp, ";sec=mskrb5"); else goto out; diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h index 05a34b17a1ab..e4041ec4d712 100644 --- a/fs/cifs/cifs_spnego.h +++ b/fs/cifs/cifs_spnego.h @@ -23,7 +23,7 @@ #ifndef _CIFS_SPNEGO_H #define _CIFS_SPNEGO_H -#define CIFS_SPNEGO_UPCALL_VERSION 1 +#define CIFS_SPNEGO_UPCALL_VERSION 2 /* * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 1ec7076f7b24..25ecbd5b0404 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -175,6 +175,8 @@ out_no_root: if (inode) iput(inode); + cifs_umount(sb, cifs_sb); + out_mount_failed: if (cifs_sb) { #ifdef CONFIG_CIFS_DFS_UPCALL @@ -930,36 +932,34 @@ static int cifs_oplock_thread(void *dummyarg) schedule_timeout(39*HZ); } else { oplock_item = list_entry(GlobalOplock_Q.next, - struct oplock_q_entry, qhead); - if (oplock_item) { - cFYI(1, ("found oplock item to write out")); - pTcon = oplock_item->tcon; - inode = oplock_item->pinode; - netfid = oplock_item->netfid; - spin_unlock(&GlobalMid_Lock); - DeleteOplockQEntry(oplock_item); - /* can not grab inode sem here since it would + struct oplock_q_entry, qhead); + cFYI(1, ("found oplock item to write out")); + pTcon = oplock_item->tcon; + inode = oplock_item->pinode; + netfid = oplock_item->netfid; + spin_unlock(&GlobalMid_Lock); + DeleteOplockQEntry(oplock_item); + /* can not grab inode sem here since it would deadlock when oplock received on delete since vfs_unlink holds the i_mutex across the call */ - /* mutex_lock(&inode->i_mutex);*/ - if (S_ISREG(inode->i_mode)) { - rc = - filemap_fdatawrite(inode->i_mapping); - if (CIFS_I(inode)->clientCanCacheRead - == 0) { - waitrc = filemap_fdatawait(inode->i_mapping); - invalidate_remote_inode(inode); - } - if (rc == 0) - rc = waitrc; - } else - rc = 0; - /* mutex_unlock(&inode->i_mutex);*/ - if (rc) - CIFS_I(inode)->write_behind_rc = rc; - cFYI(1, ("Oplock flush inode %p rc %d", - inode, rc)); + /* mutex_lock(&inode->i_mutex);*/ + if (S_ISREG(inode->i_mode)) { + rc = filemap_fdatawrite(inode->i_mapping); + if (CIFS_I(inode)->clientCanCacheRead == 0) { + waitrc = filemap_fdatawait( + inode->i_mapping); + invalidate_remote_inode(inode); + } + if (rc == 0) + rc = waitrc; + } else + rc = 0; + /* mutex_unlock(&inode->i_mutex);*/ + if (rc) + CIFS_I(inode)->write_behind_rc = rc; + cFYI(1, ("Oplock flush inode %p rc %d", + inode, rc)); /* releasing stale oplock after recent reconnect of smb session using a now incorrect file @@ -967,15 +967,13 @@ static int cifs_oplock_thread(void *dummyarg) not bother sending an oplock release if session to server still is disconnected since oplock already released by the server in that case */ - if (pTcon->tidStatus != CifsNeedReconnect) { - rc = CIFSSMBLock(0, pTcon, netfid, - 0 /* len */ , 0 /* offset */, 0, - 0, LOCKING_ANDX_OPLOCK_RELEASE, - false /* wait flag */); - cFYI(1, ("Oplock release rc = %d", rc)); - } - } else - spin_unlock(&GlobalMid_Lock); + if (pTcon->tidStatus != CifsNeedReconnect) { + rc = CIFSSMBLock(0, pTcon, netfid, + 0 /* len */ , 0 /* offset */, 0, + 0, LOCKING_ANDX_OPLOCK_RELEASE, + false /* wait flag */); + cFYI(1, ("Oplock release rc = %d", rc)); + } set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); /* yield in case q were corrupt */ } @@ -1001,8 +999,7 @@ static int cifs_dnotify_thread(void *dummyarg) list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); - if (ses && ses->server && - atomic_read(&ses->server->inFlight)) + if (ses->server && atomic_read(&ses->server->inFlight)) wake_up_all(&ses->server->response_q); } read_unlock(&GlobalSMBSeslock); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 25a6cbd15529..135c965c4137 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.53" +#define CIFS_VERSION "1.54" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7e1cf262effe..8dfd6f24d488 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -80,7 +80,8 @@ enum securityEnum { NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO */ NTLMSSP, /* NTLMSSP via SPNEGO */ - Kerberos /* Kerberos via SPNEGO */ + Kerberos, /* Kerberos via SPNEGO */ + MSKerberos, /* MS Kerberos via SPNEGO */ }; enum protocolEnum { diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 409abce12732..d2a073edd1b8 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -262,7 +262,7 @@ */ #define CIFS_NO_HANDLE 0xFFFF -#define NO_CHANGE_64 cpu_to_le64(0xFFFFFFFFFFFFFFFFULL) +#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL #define NO_CHANGE_32 0xFFFFFFFFUL /* IPC$ in ASCII */ diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index b9f5e935f821..a729d083e6f4 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -172,12 +172,13 @@ extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon); extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData); -extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, +extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, const char *fileName, const FILE_BASIC_INFO *data, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, - const FILE_BASIC_INFO *data, __u16 fid); +extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, + const FILE_BASIC_INFO *data, __u16 fid, + __u32 pid_of_opener); #if 0 extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, char *fileName, __u16 dos_attributes, @@ -191,9 +192,20 @@ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, __u16 fileHandle, __u32 opener_pid, bool AllocSizeFlag); -extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon, - char *full_path, __u64 mode, __u64 uid, - __u64 gid, dev_t dev, + +struct cifs_unix_set_info_args { + __u64 ctime; + __u64 atime; + __u64 mtime; + __u64 mode; + __u64 uid; + __u64 gid; + dev_t device; +}; + +extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon, + char *fileName, + const struct cifs_unix_set_info_args *args, const struct nls_table *nls_codepage, int remap_special_chars); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c621ffa2ca90..994de7c90474 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -128,8 +128,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon) write_lock(&GlobalSMBSeslock); list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { open_file = list_entry(tmp, struct cifsFileInfo, tlist); - if (open_file) - open_file->invalidHandle = true; + open_file->invalidHandle = true; } write_unlock(&GlobalSMBSeslock); /* BB Add call to invalidate_inodes(sb) for all superblocks mounted @@ -4816,8 +4815,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, time and resort to the original setpathinfo level which takes the ancient DOS time format with 2 second granularity */ int -CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, - const FILE_BASIC_INFO *data, __u16 fid) +CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, + const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener) { struct smb_com_transaction2_sfi_req *pSMB = NULL; char *data_offset; @@ -4830,11 +4829,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, if (rc) return rc; - /* At this point there is no need to override the current pid - with the pid of the opener, but that could change if we someday - use an existing handle (rather than opening one on the fly) */ - /* pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); - pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));*/ + pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); + pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); params = 6; pSMB->MaxSetupCount = 0; @@ -4882,9 +4878,9 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, int -CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, const char *fileName, - const FILE_BASIC_INFO *data, - const struct nls_table *nls_codepage, int remap) +CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, + const char *fileName, const FILE_BASIC_INFO *data, + const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; @@ -5013,10 +5009,9 @@ SetAttrLgcyRetry: #endif /* temporarily unneeded SetAttr legacy function */ int -CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon, - char *fileName, __u64 mode, __u64 uid, __u64 gid, - dev_t device, const struct nls_table *nls_codepage, - int remap) +CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, + const struct cifs_unix_set_info_args *args, + const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; @@ -5025,6 +5020,7 @@ CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon, int bytes_returned = 0; FILE_UNIX_BASIC_INFO *data_offset; __u16 params, param_offset, offset, count, byte_count; + __u64 mode = args->mode; cFYI(1, ("In SetUID/GID/Mode")); setPermsRetry: @@ -5080,16 +5076,16 @@ setPermsRetry: set file size and do not want to truncate file size to zero accidently as happened on one Samba server beta by putting zero instead of -1 here */ - data_offset->EndOfFile = NO_CHANGE_64; - data_offset->NumOfBytes = NO_CHANGE_64; - data_offset->LastStatusChange = NO_CHANGE_64; - data_offset->LastAccessTime = NO_CHANGE_64; - data_offset->LastModificationTime = NO_CHANGE_64; - data_offset->Uid = cpu_to_le64(uid); - data_offset->Gid = cpu_to_le64(gid); + data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64); + data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64); + data_offset->LastStatusChange = cpu_to_le64(args->ctime); + data_offset->LastAccessTime = cpu_to_le64(args->atime); + data_offset->LastModificationTime = cpu_to_le64(args->mtime); + data_offset->Uid = cpu_to_le64(args->uid); + data_offset->Gid = cpu_to_le64(args->gid); /* better to leave device as zero when it is */ - data_offset->DevMajor = cpu_to_le64(MAJOR(device)); - data_offset->DevMinor = cpu_to_le64(MINOR(device)); + data_offset->DevMajor = cpu_to_le64(MAJOR(args->device)); + data_offset->DevMinor = cpu_to_le64(MINOR(args->device)); data_offset->Permissions = cpu_to_le64(mode); if (S_ISREG(mode)) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index b51d5777cde6..4c13bcdb92a5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -151,7 +151,7 @@ cifs_reconnect(struct TCP_Server_Info *server) } list_for_each(tmp, &GlobalTreeConnectionList) { tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - if ((tcon) && (tcon->ses) && (tcon->ses->server == server)) + if ((tcon->ses) && (tcon->ses->server == server)) tcon->tidStatus = CifsNeedReconnect; } read_unlock(&GlobalSMBSeslock); @@ -173,14 +173,12 @@ cifs_reconnect(struct TCP_Server_Info *server) mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - if (mid_entry) { - if (mid_entry->midState == MID_REQUEST_SUBMITTED) { + if (mid_entry->midState == MID_REQUEST_SUBMITTED) { /* Mark other intransit requests as needing retry so we do not immediately mark the session bad again (ie after we reconnect below) as they timeout too */ - mid_entry->midState = MID_RETRY_NEEDED; - } + mid_entry->midState = MID_RETRY_NEEDED; } } spin_unlock(&GlobalMid_Lock); @@ -351,11 +349,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) current->flags |= PF_MEMALLOC; cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); - write_lock(&GlobalSMBSeslock); - atomic_inc(&tcpSesAllocCount); - length = tcpSesAllocCount.counter; - write_unlock(&GlobalSMBSeslock); - if (length > 1) + + length = atomic_inc_return(&tcpSesAllocCount); + if (length > 1) mempool_resize(cifs_req_poolp, length + cifs_min_rcv, GFP_KERNEL); @@ -745,14 +741,11 @@ multi_t2_fnd: coming home not much else we can do but free the memory */ } - write_lock(&GlobalSMBSeslock); - atomic_dec(&tcpSesAllocCount); - length = tcpSesAllocCount.counter; - /* last chance to mark ses pointers invalid if there are any pointing to this (e.g if a crazy root user tried to kill cifsd kernel thread explicitly this might happen) */ + write_lock(&GlobalSMBSeslock); list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); @@ -763,6 +756,8 @@ multi_t2_fnd: kfree(server->hostname); kfree(server); + + length = atomic_dec_return(&tcpSesAllocCount); if (length > 0) mempool_resize(cifs_req_poolp, length + cifs_min_rcv, GFP_KERNEL); @@ -3603,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, char ntlm_session_key[CIFS_SESS_KEY_SIZE]; bool ntlmv2_flag = false; int first_time = 0; + struct TCP_Server_Info *server = pSesInfo->server; /* what if server changes its buffer size after dropping the session? */ - if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { + if (server->maxBuf == 0) /* no need to send on reconnect */ { rc = CIFSSMBNegotiate(xid, pSesInfo); - if (rc == -EAGAIN) /* retry only once on 1st time connection */ { + if (rc == -EAGAIN) { + /* retry only once on 1st time connection */ rc = CIFSSMBNegotiate(xid, pSesInfo); if (rc == -EAGAIN) rc = -EHOSTDOWN; } if (rc == 0) { spin_lock(&GlobalMid_Lock); - if (pSesInfo->server->tcpStatus != CifsExiting) - pSesInfo->server->tcpStatus = CifsGood; + if (server->tcpStatus != CifsExiting) + server->tcpStatus = CifsGood; else rc = -EHOSTDOWN; spin_unlock(&GlobalMid_Lock); @@ -3623,97 +3620,90 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, } first_time = 1; } - if (!rc) { - pSesInfo->flags = 0; - pSesInfo->capabilities = pSesInfo->server->capabilities; - if (linuxExtEnabled == 0) - pSesInfo->capabilities &= (~CAP_UNIX); + + if (rc) + goto ss_err_exit; + + pSesInfo->flags = 0; + pSesInfo->capabilities = server->capabilities; + if (linuxExtEnabled == 0) + pSesInfo->capabilities &= (~CAP_UNIX); /* pSesInfo->sequence_number = 0;*/ - cFYI(1, - ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", - pSesInfo->server->secMode, - pSesInfo->server->capabilities, - pSesInfo->server->timeAdj)); - if (experimEnabled < 2) - rc = CIFS_SessSetup(xid, pSesInfo, - first_time, nls_info); - else if (extended_security - && (pSesInfo->capabilities - & CAP_EXTENDED_SECURITY) - && (pSesInfo->server->secType == NTLMSSP)) { - rc = -EOPNOTSUPP; - } else if (extended_security - && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) - && (pSesInfo->server->secType == RawNTLMSSP)) { - cFYI(1, ("NTLMSSP sesssetup")); - rc = CIFSNTLMSSPNegotiateSessSetup(xid, - pSesInfo, - &ntlmv2_flag, - nls_info); - if (!rc) { - if (ntlmv2_flag) { - char *v2_response; - cFYI(1, ("more secure NTLM ver2 hash")); - if (CalcNTLMv2_partial_mac_key(pSesInfo, - nls_info)) { - rc = -ENOMEM; - goto ss_err_exit; - } else - v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL); - if (v2_response) { - CalcNTLMv2_response(pSesInfo, - v2_response); - /* if (first_time) - cifs_calculate_ntlmv2_mac_key( - pSesInfo->server->mac_signing_key, - response, ntlm_session_key,*/ - kfree(v2_response); + cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", + server->secMode, server->capabilities, server->timeAdj)); + + if (experimEnabled < 2) + rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); + else if (extended_security + && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) + && (server->secType == NTLMSSP)) { + rc = -EOPNOTSUPP; + } else if (extended_security + && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) + && (server->secType == RawNTLMSSP)) { + cFYI(1, ("NTLMSSP sesssetup")); + rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, + nls_info); + if (!rc) { + if (ntlmv2_flag) { + char *v2_response; + cFYI(1, ("more secure NTLM ver2 hash")); + if (CalcNTLMv2_partial_mac_key(pSesInfo, + nls_info)) { + rc = -ENOMEM; + goto ss_err_exit; + } else + v2_response = kmalloc(16 + 64 /* blob*/, + GFP_KERNEL); + if (v2_response) { + CalcNTLMv2_response(pSesInfo, + v2_response); + /* if (first_time) + cifs_calculate_ntlmv2_mac_key */ + kfree(v2_response); /* BB Put dummy sig in SessSetup PDU? */ - } else { - rc = -ENOMEM; - goto ss_err_exit; - } - } else { - SMBNTencrypt(pSesInfo->password, - pSesInfo->server->cryptKey, - ntlm_session_key); - - if (first_time) - cifs_calculate_mac_key( - &pSesInfo->server->mac_signing_key, - ntlm_session_key, - pSesInfo->password); + rc = -ENOMEM; + goto ss_err_exit; } + + } else { + SMBNTencrypt(pSesInfo->password, + server->cryptKey, + ntlm_session_key); + + if (first_time) + cifs_calculate_mac_key( + &server->mac_signing_key, + ntlm_session_key, + pSesInfo->password); + } /* for better security the weaker lanman hash not sent in AuthSessSetup so we no longer calculate it */ - rc = CIFSNTLMSSPAuthSessSetup(xid, - pSesInfo, - ntlm_session_key, - ntlmv2_flag, - nls_info); - } - } else { /* old style NTLM 0.12 session setup */ - SMBNTencrypt(pSesInfo->password, - pSesInfo->server->cryptKey, - ntlm_session_key); + rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo, + ntlm_session_key, + ntlmv2_flag, + nls_info); + } + } else { /* old style NTLM 0.12 session setup */ + SMBNTencrypt(pSesInfo->password, server->cryptKey, + ntlm_session_key); - if (first_time) - cifs_calculate_mac_key( - &pSesInfo->server->mac_signing_key, - ntlm_session_key, pSesInfo->password); + if (first_time) + cifs_calculate_mac_key(&server->mac_signing_key, + ntlm_session_key, + pSesInfo->password); - rc = CIFSSessSetup(xid, pSesInfo, - ntlm_session_key, nls_info); - } - if (rc) { - cERROR(1, ("Send error in SessSetup = %d", rc)); - } else { - cFYI(1, ("CIFS Session Established successfully")); + rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); + } + if (rc) { + cERROR(1, ("Send error in SessSetup = %d", rc)); + } else { + cFYI(1, ("CIFS Session Established successfully")); pSesInfo->status = CifsGood; - } } + ss_err_exit: return rc; } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index fb69c1fa85c9..e962e75e6f7b 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -226,23 +226,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, /* If Open reported that we actually created a file then we now have to set the mode if possible */ if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { + struct cifs_unix_set_info_args args = { + .mode = mode, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = 0, + }; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, - (__u64)current->fsuid, - (__u64)current->fsgid, - 0 /* dev */, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = (__u64) current->fsuid; + if (inode->i_mode & S_ISGID) + args.gid = (__u64) inode->i_gid; + else + args.gid = (__u64) current->fsgid; } else { - CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, - (__u64)-1, - (__u64)-1, - 0 /* dev */, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = NO_CHANGE_64; + args.gid = NO_CHANGE_64; } + CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else { /* BB implement mode setting via Windows security descriptors e.g. */ @@ -267,7 +272,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { newinode->i_uid = current->fsuid; - newinode->i_gid = current->fsgid; + if (inode->i_mode & S_ISGID) + newinode->i_gid = + inode->i_gid; + else + newinode->i_gid = + current->fsgid; } } } @@ -357,21 +367,24 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, if (full_path == NULL) rc = -ENOMEM; else if (pTcon->unix_ext) { - mode &= ~current->fs->umask; + struct cifs_unix_set_info_args args = { + .mode = mode & ~current->fs->umask, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = device_number, + }; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, - mode, (__u64)current->fsuid, - (__u64)current->fsgid, - device_number, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = (__u64) current->fsuid; + args.gid = (__u64) current->fsgid; } else { - rc = CIFSSMBUnixSetPerms(xid, pTcon, - full_path, mode, (__u64)-1, (__u64)-1, - device_number, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = NO_CHANGE_64; + args.gid = NO_CHANGE_64; } + rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, + &args, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (!rc) { rc = cifs_get_inode_info_unix(&newinode, full_path, diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index f730ef35499e..a2e0673e1b08 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -47,11 +47,18 @@ static int dns_resolver_instantiate(struct key *key, const void *data, return rc; } +static void +dns_resolver_destroy(struct key *key) +{ + kfree(key->payload.data); +} + struct key_type key_type_dns_resolver = { .name = "dns_resolver", .def_datalen = sizeof(struct in_addr), .describe = user_describe, .instantiate = dns_resolver_instantiate, + .destroy = dns_resolver_destroy, .match = user_match, }; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0aac824371a5..ff14d14903a0 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -310,18 +310,19 @@ int cifs_open(struct inode *inode, struct file *file) /* time to set mode which we can not set earlier due to problems creating new read-only files */ if (pTcon->unix_ext) { - CIFSSMBUnixSetPerms(xid, pTcon, full_path, - inode->i_mode, - (__u64)-1, (__u64)-1, 0 /* dev */, + struct cifs_unix_set_info_args args = { + .mode = inode->i_mode, + .uid = NO_CHANGE_64, + .gid = NO_CHANGE_64, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = 0, + }; + CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - } else { - /* BB implement via Windows security descriptors eg - CIFSSMBWinSetPerms(xid, pTcon, full_path, mode, - -1, -1, local_nls); - in the meantime could set r/o dos attribute when - perms are eg: mode & 0222 == 0 */ } } @@ -1280,7 +1281,7 @@ retry: if (first < 0) lock_page(page); - else if (TestSetPageLocked(page)) + else if (!trylock_page(page)) break; if (unlikely(page->mapping != mapping)) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 46e54d39461d..9c548f110102 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode, if ((inode->i_mode & S_IWUGO) == 0 && (attr & ATTR_READONLY) == 0) inode->i_mode |= (S_IWUGO & default_mode); - inode->i_mode &= ~S_IFMT; + + inode->i_mode &= ~S_IFMT; } /* clear write bits if ATTR_READONLY is set */ if (attr & ATTR_READONLY) @@ -649,6 +650,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino) inode->i_fop = &simple_dir_operations; inode->i_uid = cifs_sb->mnt_uid; inode->i_gid = cifs_sb->mnt_gid; + } else if (rc) { _FreeXid(xid); iget_failed(inode); return ERR_PTR(rc); @@ -737,7 +739,7 @@ psx_del_no_retry: /* ATTRS set to normal clears r/o bit */ pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL); if (!(pTcon->ses->flags & CIFS_SES_NT4)) - rc = CIFSSMBSetTimes(xid, pTcon, full_path, + rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, pinfo_buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & @@ -767,9 +769,10 @@ psx_del_no_retry: cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc == 0) { - rc = CIFSSMBSetFileTimes(xid, pTcon, - pinfo_buf, - netfid); + rc = CIFSSMBSetFileInfo(xid, pTcon, + pinfo_buf, + netfid, + current->tgid); CIFSSMBClose(xid, pTcon, netfid); } } @@ -984,32 +987,41 @@ mkdir_get_info: * failed to get it from the server or was set bogus */ if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) direntry->d_inode->i_nlink = 2; + mode &= ~current->fs->umask; + /* must turn on setgid bit if parent dir has it */ + if (inode->i_mode & S_ISGID) + mode |= S_ISGID; + if (pTcon->unix_ext) { + struct cifs_unix_set_info_args args = { + .mode = mode, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = 0, + }; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - CIFSSMBUnixSetPerms(xid, pTcon, full_path, - mode, - (__u64)current->fsuid, - (__u64)current->fsgid, - 0 /* dev_t */, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = (__u64)current->fsuid; + if (inode->i_mode & S_ISGID) + args.gid = (__u64)inode->i_gid; + else + args.gid = (__u64)current->fsgid; } else { - CIFSSMBUnixSetPerms(xid, pTcon, full_path, - mode, (__u64)-1, - (__u64)-1, 0 /* dev_t */, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = NO_CHANGE_64; + args.gid = NO_CHANGE_64; } + CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else { if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && (mode & S_IWUGO) == 0) { FILE_BASIC_INFO pInfo; memset(&pInfo, 0, sizeof(pInfo)); pInfo.Attributes = cpu_to_le32(ATTR_READONLY); - CIFSSMBSetTimes(xid, pTcon, full_path, + CIFSSMBSetPathInfo(xid, pTcon, full_path, &pInfo, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); @@ -1024,8 +1036,12 @@ mkdir_get_info: CIFS_MOUNT_SET_UID) { direntry->d_inode->i_uid = current->fsuid; - direntry->d_inode->i_gid = - current->fsgid; + if (inode->i_mode & S_ISGID) + direntry->d_inode->i_gid = + inode->i_gid; + else + direntry->d_inode->i_gid = + current->fsgid; } } } @@ -1310,10 +1326,11 @@ int cifs_revalidate(struct dentry *direntry) /* if (S_ISDIR(direntry->d_inode->i_mode)) shrink_dcache_parent(direntry); */ if (S_ISREG(direntry->d_inode->i_mode)) { - if (direntry->d_inode->i_mapping) + if (direntry->d_inode->i_mapping) { wbrc = filemap_fdatawait(direntry->d_inode->i_mapping); if (wbrc) CIFS_I(direntry->d_inode)->write_behind_rc = wbrc; + } /* may eventually have to do this for open files too */ if (list_empty(&(cifsInode->openFileList))) { /* changed on server - flush read ahead pages */ @@ -1489,30 +1506,228 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, return rc; } -int cifs_setattr(struct dentry *direntry, struct iattr *attrs) +static int +cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, + char *full_path, __u32 dosattr) { + int rc; + int oplock = 0; + __u16 netfid; + __u32 netpid; + bool set_time = false; + struct cifsFileInfo *open_file; + struct cifsInodeInfo *cifsInode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsTconInfo *pTcon = cifs_sb->tcon; + FILE_BASIC_INFO info_buf; + + if (attrs->ia_valid & ATTR_ATIME) { + set_time = true; + info_buf.LastAccessTime = + cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); + } else + info_buf.LastAccessTime = 0; + + if (attrs->ia_valid & ATTR_MTIME) { + set_time = true; + info_buf.LastWriteTime = + cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); + } else + info_buf.LastWriteTime = 0; + + /* + * Samba throws this field away, but windows may actually use it. + * Do not set ctime unless other time stamps are changed explicitly + * (i.e. by utimes()) since we would then have a mix of client and + * server times. + */ + if (set_time && (attrs->ia_valid & ATTR_CTIME)) { + cFYI(1, ("CIFS - CTIME changed")); + info_buf.ChangeTime = + cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); + } else + info_buf.ChangeTime = 0; + + info_buf.CreationTime = 0; /* don't change */ + info_buf.Attributes = cpu_to_le32(dosattr); + + /* + * If the file is already open for write, just use that fileid + */ + open_file = find_writable_file(cifsInode); + if (open_file) { + netfid = open_file->netfid; + netpid = open_file->pid; + goto set_via_filehandle; + } + + /* + * NT4 apparently returns success on this call, but it doesn't + * really work. + */ + if (!(pTcon->ses->flags & CIFS_SES_NT4)) { + rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, + &info_buf, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc != -EOPNOTSUPP && rc != -EINVAL) + goto out; + } + + cFYI(1, ("calling SetFileInfo since SetPathInfo for " + "times not supported by this server")); + rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, + SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, + CREATE_NOT_DIR, &netfid, &oplock, + NULL, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + + if (rc != 0) { + if (rc == -EIO) + rc = -EINVAL; + goto out; + } + + netpid = current->tgid; + +set_via_filehandle: + rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); + if (open_file == NULL) + CIFSSMBClose(xid, pTcon, netfid); + else + atomic_dec(&open_file->wrtPending); +out: + return rc; +} + +static int +cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) +{ + int rc; int xid; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; char *full_path = NULL; - int rc = -EACCES; - FILE_BASIC_INFO time_buf; - bool set_time = false; - bool set_dosattr = false; - __u64 mode = 0xFFFFFFFFFFFFFFFFULL; - __u64 uid = 0xFFFFFFFFFFFFFFFFULL; - __u64 gid = 0xFFFFFFFFFFFFFFFFULL; - struct cifsInodeInfo *cifsInode; struct inode *inode = direntry->d_inode; + struct cifsInodeInfo *cifsInode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsTconInfo *pTcon = cifs_sb->tcon; + struct cifs_unix_set_info_args *args = NULL; + + cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x", + direntry->d_name.name, attrs->ia_valid)); + + xid = GetXid(); + + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { + /* check if we have permission to change attrs */ + rc = inode_change_ok(inode, attrs); + if (rc < 0) + goto out; + else + rc = 0; + } + + full_path = build_path_from_dentry(direntry); + if (full_path == NULL) { + rc = -ENOMEM; + goto out; + } + + if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { + /* + Flush data before changing file size or changing the last + write time of the file on the server. If the + flush returns error, store it to report later and continue. + BB: This should be smarter. Why bother flushing pages that + will be truncated anyway? Also, should we error out here if + the flush returns error? + */ + rc = filemap_write_and_wait(inode->i_mapping); + if (rc != 0) { + cifsInode->write_behind_rc = rc; + rc = 0; + } + } + + if (attrs->ia_valid & ATTR_SIZE) { + rc = cifs_set_file_size(inode, attrs, xid, full_path); + if (rc != 0) + goto out; + } + + /* skip mode change if it's just for clearing setuid/setgid */ + if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) + attrs->ia_valid &= ~ATTR_MODE; + + args = kmalloc(sizeof(*args), GFP_KERNEL); + if (args == NULL) { + rc = -ENOMEM; + goto out; + } + + /* set up the struct */ + if (attrs->ia_valid & ATTR_MODE) + args->mode = attrs->ia_mode; + else + args->mode = NO_CHANGE_64; + + if (attrs->ia_valid & ATTR_UID) + args->uid = attrs->ia_uid; + else + args->uid = NO_CHANGE_64; + + if (attrs->ia_valid & ATTR_GID) + args->gid = attrs->ia_gid; + else + args->gid = NO_CHANGE_64; + + if (attrs->ia_valid & ATTR_ATIME) + args->atime = cifs_UnixTimeToNT(attrs->ia_atime); + else + args->atime = NO_CHANGE_64; + + if (attrs->ia_valid & ATTR_MTIME) + args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime); + else + args->mtime = NO_CHANGE_64; + + if (attrs->ia_valid & ATTR_CTIME) + args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime); + else + args->ctime = NO_CHANGE_64; + + args->device = 0; + rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + + if (!rc) + rc = inode_setattr(inode, attrs); +out: + kfree(args); + kfree(full_path); + FreeXid(xid); + return rc; +} + +static int +cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) +{ + int xid; + struct inode *inode = direntry->d_inode; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsInodeInfo *cifsInode = CIFS_I(inode); + char *full_path = NULL; + int rc = -EACCES; + __u32 dosattr = 0; + __u64 mode = NO_CHANGE_64; xid = GetXid(); cFYI(1, ("setattr on file %s attrs->iavalid 0x%x", direntry->d_name.name, attrs->ia_valid)); - cifs_sb = CIFS_SB(inode->i_sb); - pTcon = cifs_sb->tcon; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { /* check if we have permission to change attrs */ rc = inode_change_ok(inode, attrs); @@ -1528,7 +1743,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) FreeXid(xid); return -ENOMEM; } - cifsInode = CIFS_I(inode); if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { /* @@ -1559,21 +1773,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) * CIFSACL support + proper Windows to Unix idmapping, we may be * able to support this in the future. */ - if (!pTcon->unix_ext && - !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); - } else { - if (attrs->ia_valid & ATTR_UID) { - cFYI(1, ("UID changed to %d", attrs->ia_uid)); - uid = attrs->ia_uid; - } - if (attrs->ia_valid & ATTR_GID) { - cFYI(1, ("GID changed to %d", attrs->ia_gid)); - gid = attrs->ia_gid; - } - } - - time_buf.Attributes = 0; /* skip mode change if it's just for clearing setuid/setgid */ if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) @@ -1584,13 +1785,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) mode = attrs->ia_mode; } - if ((pTcon->unix_ext) - && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID))) - rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, uid, gid, - 0 /* dev_t */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - else if (attrs->ia_valid & ATTR_MODE) { + if (attrs->ia_valid & ATTR_MODE) { rc = 0; #ifdef CONFIG_CIFS_EXPERIMENTAL if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) @@ -1599,24 +1794,19 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) #endif if (((mode & S_IWUGO) == 0) && (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { - set_dosattr = true; - time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | - ATTR_READONLY); + + dosattr = cifsInode->cifsAttrs | ATTR_READONLY; + /* fix up mode if we're not using dynperm */ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) attrs->ia_mode = inode->i_mode & ~S_IWUGO; } else if ((mode & S_IWUGO) && (cifsInode->cifsAttrs & ATTR_READONLY)) { - /* If file is readonly on server, we would - not be able to write to it - so if any write - bit is enabled for user or group or other we - need to at least try to remove r/o dos attr */ - set_dosattr = true; - time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs & - (~ATTR_READONLY)); - /* Windows ignores set to zero */ - if (time_buf.Attributes == 0) - time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); + + dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY; + /* Attributes of 0 are ignored */ + if (dosattr == 0) + dosattr |= ATTR_NORMAL; /* reset local inode permissions to normal */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { @@ -1634,82 +1824,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) } } - if (attrs->ia_valid & ATTR_ATIME) { - set_time = true; - time_buf.LastAccessTime = - cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); - } else - time_buf.LastAccessTime = 0; - - if (attrs->ia_valid & ATTR_MTIME) { - set_time = true; - time_buf.LastWriteTime = - cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); - } else - time_buf.LastWriteTime = 0; - /* Do not set ctime explicitly unless other time - stamps are changed explicitly (i.e. by utime() - since we would then have a mix of client and - server times */ + if (attrs->ia_valid & (ATTR_MTIME|ATTR_ATIME|ATTR_CTIME) || + ((attrs->ia_valid & ATTR_MODE) && dosattr)) { + rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr); + /* BB: check for rc = -EOPNOTSUPP and switch to legacy mode */ - if (set_time && (attrs->ia_valid & ATTR_CTIME)) { - set_time = true; - /* Although Samba throws this field away - it may be useful to Windows - but we do - not want to set ctime unless some other - timestamp is changing */ - cFYI(1, ("CIFS - CTIME changed")); - time_buf.ChangeTime = - cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); - } else - time_buf.ChangeTime = 0; - - if (set_time || set_dosattr) { - time_buf.CreationTime = 0; /* do not change */ - /* In the future we should experiment - try setting timestamps - via Handle (SetFileInfo) instead of by path */ - if (!(pTcon->ses->flags & CIFS_SES_NT4)) - rc = CIFSSMBSetTimes(xid, pTcon, full_path, &time_buf, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - else - rc = -EOPNOTSUPP; - - if (rc == -EOPNOTSUPP) { - int oplock = 0; - __u16 netfid; - - cFYI(1, ("calling SetFileInfo since SetPathInfo for " - "times not supported by this server")); - /* BB we could scan to see if we already have it open - and pass in pid of opener to function */ - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, - SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, - CREATE_NOT_DIR, &netfid, &oplock, - NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0) { - rc = CIFSSMBSetFileTimes(xid, pTcon, &time_buf, - netfid); - CIFSSMBClose(xid, pTcon, netfid); - } else { - /* BB For even older servers we could convert time_buf - into old DOS style which uses two second - granularity */ - - /* rc = CIFSSMBSetTimesLegacy(xid, pTcon, full_path, - &time_buf, cifs_sb->local_nls); */ - } - } /* Even if error on time set, no sense failing the call if the server would set the time to a reasonable value anyway, and this check ensures that we are not being called from sys_utimes in which case we ought to fail the call back to the user when the server rejects the call */ if ((rc) && (attrs->ia_valid & - (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE))) + (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE))) rc = 0; } @@ -1723,6 +1849,21 @@ cifs_setattr_exit: return rc; } +int +cifs_setattr(struct dentry *direntry, struct iattr *attrs) +{ + struct inode *inode = direntry->d_inode; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsTconInfo *pTcon = cifs_sb->tcon; + + if (pTcon->unix_ext) + return cifs_setattr_unix(direntry, attrs); + + return cifs_setattr_nounix(direntry, attrs); + + /* BB: add cifs_setattr_legacy for really old servers */ +} + #if 0 void cifs_delete_inode(struct inode *inode) { diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index ed150efbe27c..b537fad3bf50 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -505,7 +505,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); } else ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); - } else if (type == Kerberos) { + } else if (type == Kerberos || type == MSKerberos) { #ifdef CONFIG_CIFS_UPCALL struct cifs_spnego_msg *msg; spnego_key = cifs_get_spnego_key(ses); @@ -516,6 +516,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, } msg = spnego_key->payload.data; + /* check version field to make sure that cifs.upcall is + sending us a response in an expected form */ + if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { + cERROR(1, ("incorrect version of cifs.upcall (expected" + " %d but got %d)", + CIFS_SPNEGO_UPCALL_VERSION, msg->version)); + rc = -EKEYREJECTED; + goto ssetup_exit; + } /* bail out if key is too long */ if (msg->sesskey_len > sizeof(ses->server->mac_signing_key.data.krb5)) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 000ac509c98a..e286db9f5ee2 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -265,6 +265,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, cFYI(1, ("Sending smb: total_len %d", total_len)); dump_smb(smb_buffer, len); + i = 0; while (total_len) { rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], n_vec - first_vec, total_len); diff --git a/fs/compat.c b/fs/compat.c index c9d1472e65c5..075d0509970d 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -792,8 +792,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen, if (buf->result) return -EINVAL; d_ino = ino; - if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->result = -EOVERFLOW; return -EOVERFLOW; + } buf->result++; dirent = buf->dirent; if (!access_ok(VERIFY_WRITE, dirent, @@ -862,8 +864,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen, if (reclen > buf->count) return -EINVAL; d_ino = ino; - if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->error = -EOVERFLOW; return -EOVERFLOW; + } dirent = buf->previous; if (dirent) { if (__put_user(offset, &dirent->d_off)) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7a8db78a91d2..8e93341f3e82 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1311,16 +1311,18 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) * Ensure that no racing symlink() will make detach_prep() fail while * the new link is temporarily attached */ - mutex_lock(&configfs_symlink_mutex); - spin_lock(&configfs_dirent_lock); do { struct mutex *wait_mutex; + mutex_lock(&configfs_symlink_mutex); + spin_lock(&configfs_dirent_lock); ret = configfs_detach_prep(dentry, &wait_mutex); - if (ret) { + if (ret) configfs_detach_rollback(dentry); - spin_unlock(&configfs_dirent_lock); - mutex_unlock(&configfs_symlink_mutex); + spin_unlock(&configfs_dirent_lock); + mutex_unlock(&configfs_symlink_mutex); + + if (ret) { if (ret != -EAGAIN) { config_item_put(parent_item); return ret; @@ -1329,13 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) /* Wait until the racing operation terminates */ mutex_lock(wait_mutex); mutex_unlock(wait_mutex); - - mutex_lock(&configfs_symlink_mutex); - spin_lock(&configfs_dirent_lock); } } while (ret == -EAGAIN); - spin_unlock(&configfs_dirent_lock); - mutex_unlock(&configfs_symlink_mutex); /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 0c3b618c15b3..f40423eb1a14 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -43,58 +43,13 @@ static DEFINE_MUTEX(read_mutex); static int cramfs_iget5_test(struct inode *inode, void *opaque) { struct cramfs_inode *cramfs_inode = opaque; - - if (inode->i_ino != CRAMINO(cramfs_inode)) - return 0; /* does not match */ - - if (inode->i_ino != 1) - return 1; - - /* all empty directories, char, block, pipe, and sock, share inode #1 */ - - if ((inode->i_mode != cramfs_inode->mode) || - (inode->i_gid != cramfs_inode->gid) || - (inode->i_uid != cramfs_inode->uid)) - return 0; /* does not match */ - - if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) && - (inode->i_rdev != old_decode_dev(cramfs_inode->size))) - return 0; /* does not match */ - - return 1; /* matches */ + return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1; } static int cramfs_iget5_set(struct inode *inode, void *opaque) { - static struct timespec zerotime; struct cramfs_inode *cramfs_inode = opaque; - inode->i_mode = cramfs_inode->mode; - inode->i_uid = cramfs_inode->uid; - inode->i_size = cramfs_inode->size; - inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; - inode->i_gid = cramfs_inode->gid; - /* Struct copy intentional */ - inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; inode->i_ino = CRAMINO(cramfs_inode); - /* inode->i_nlink is left 1 - arguably wrong for directories, - but it's the best we can do without reading the directory - contents. 1 yields the right result in GNU find, even - without -noleaf option. */ - if (S_ISREG(inode->i_mode)) { - inode->i_fop = &generic_ro_fops; - inode->i_data.a_ops = &cramfs_aops; - } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &cramfs_dir_inode_operations; - inode->i_fop = &cramfs_directory_operations; - } else if (S_ISLNK(inode->i_mode)) { - inode->i_op = &page_symlink_inode_operations; - inode->i_data.a_ops = &cramfs_aops; - } else { - inode->i_size = 0; - inode->i_blocks = 0; - init_special_inode(inode, inode->i_mode, - old_decode_dev(cramfs_inode->size)); - } return 0; } @@ -104,12 +59,48 @@ static struct inode *get_cramfs_inode(struct super_block *sb, struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), cramfs_iget5_test, cramfs_iget5_set, cramfs_inode); + static struct timespec zerotime; + if (inode && (inode->i_state & I_NEW)) { + inode->i_mode = cramfs_inode->mode; + inode->i_uid = cramfs_inode->uid; + inode->i_size = cramfs_inode->size; + inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; + inode->i_gid = cramfs_inode->gid; + /* Struct copy intentional */ + inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; + /* inode->i_nlink is left 1 - arguably wrong for directories, + but it's the best we can do without reading the directory + contents. 1 yields the right result in GNU find, even + without -noleaf option. */ + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &generic_ro_fops; + inode->i_data.a_ops = &cramfs_aops; + } else if (S_ISDIR(inode->i_mode)) { + inode->i_op = &cramfs_dir_inode_operations; + inode->i_fop = &cramfs_directory_operations; + } else if (S_ISLNK(inode->i_mode)) { + inode->i_op = &page_symlink_inode_operations; + inode->i_data.a_ops = &cramfs_aops; + } else { + inode->i_size = 0; + inode->i_blocks = 0; + init_special_inode(inode, inode->i_mode, + old_decode_dev(cramfs_inode->size)); + } unlock_new_inode(inode); } return inode; } +static void cramfs_drop_inode(struct inode *inode) +{ + if (inode->i_ino == 1) + generic_delete_inode(inode); + else + generic_drop_inode(inode); +} + /* * We have our own block cache: don't fill up the buffer cache * with the rom-image, because the way the filesystem is set @@ -534,6 +525,7 @@ static const struct super_operations cramfs_ops = { .put_super = cramfs_put_super, .remount_fs = cramfs_remount, .statfs = cramfs_statfs, + .drop_inode = cramfs_drop_inode, }; static int cramfs_get_sb(struct file_system_type *fs_type, diff --git a/fs/dcache.c b/fs/dcache.c index 101663d15e9f..80e93956aced 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1236,7 +1236,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) * If no entry exists with the exact case name, allocate new dentry with * the exact case, and return the spliced entry. */ -struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry, +struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, struct qstr *name) { int error; diff --git a/fs/dlm/config.c b/fs/dlm/config.c index c4e7d721bd8d..89d2fb7b991a 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -30,16 +30,16 @@ static struct config_group *space_list; static struct config_group *comm_list; -static struct comm *local_comm; +static struct dlm_comm *local_comm; -struct clusters; -struct cluster; -struct spaces; -struct space; -struct comms; -struct comm; -struct nodes; -struct node; +struct dlm_clusters; +struct dlm_cluster; +struct dlm_spaces; +struct dlm_space; +struct dlm_comms; +struct dlm_comm; +struct dlm_nodes; +struct dlm_node; static struct config_group *make_cluster(struct config_group *, const char *); static void drop_cluster(struct config_group *, struct config_item *); @@ -68,17 +68,22 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, const char *buf, size_t len); -static ssize_t comm_nodeid_read(struct comm *cm, char *buf); -static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len); -static ssize_t comm_local_read(struct comm *cm, char *buf); -static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len); -static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len); -static ssize_t node_nodeid_read(struct node *nd, char *buf); -static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len); -static ssize_t node_weight_read(struct node *nd, char *buf); -static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); - -struct cluster { +static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf); +static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, + size_t len); +static ssize_t comm_local_read(struct dlm_comm *cm, char *buf); +static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, + size_t len); +static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, + size_t len); +static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf); +static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, + size_t len); +static ssize_t node_weight_read(struct dlm_node *nd, char *buf); +static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, + size_t len); + +struct dlm_cluster { struct config_group group; unsigned int cl_tcp_port; unsigned int cl_buffer_size; @@ -109,11 +114,11 @@ enum { struct cluster_attribute { struct configfs_attribute attr; - ssize_t (*show)(struct cluster *, char *); - ssize_t (*store)(struct cluster *, const char *, size_t); + ssize_t (*show)(struct dlm_cluster *, char *); + ssize_t (*store)(struct dlm_cluster *, const char *, size_t); }; -static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, +static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, int *info_field, int check_zero, const char *buf, size_t len) { @@ -134,12 +139,12 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, } #define CLUSTER_ATTR(name, check_zero) \ -static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ +static ssize_t name##_write(struct dlm_cluster *cl, const char *buf, size_t len) \ { \ return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ check_zero, buf, len); \ } \ -static ssize_t name##_read(struct cluster *cl, char *buf) \ +static ssize_t name##_read(struct dlm_cluster *cl, char *buf) \ { \ return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ } \ @@ -181,8 +186,8 @@ enum { struct comm_attribute { struct configfs_attribute attr; - ssize_t (*show)(struct comm *, char *); - ssize_t (*store)(struct comm *, const char *, size_t); + ssize_t (*show)(struct dlm_comm *, char *); + ssize_t (*store)(struct dlm_comm *, const char *, size_t); }; static struct comm_attribute comm_attr_nodeid = { @@ -222,8 +227,8 @@ enum { struct node_attribute { struct configfs_attribute attr; - ssize_t (*show)(struct node *, char *); - ssize_t (*store)(struct node *, const char *, size_t); + ssize_t (*show)(struct dlm_node *, char *); + ssize_t (*store)(struct dlm_node *, const char *, size_t); }; static struct node_attribute node_attr_nodeid = { @@ -248,26 +253,26 @@ static struct configfs_attribute *node_attrs[] = { NULL, }; -struct clusters { +struct dlm_clusters { struct configfs_subsystem subsys; }; -struct spaces { +struct dlm_spaces { struct config_group ss_group; }; -struct space { +struct dlm_space { struct config_group group; struct list_head members; struct mutex members_lock; int members_count; }; -struct comms { +struct dlm_comms { struct config_group cs_group; }; -struct comm { +struct dlm_comm { struct config_item item; int nodeid; int local; @@ -275,11 +280,11 @@ struct comm { struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; }; -struct nodes { +struct dlm_nodes { struct config_group ns_group; }; -struct node { +struct dlm_node { struct config_item item; struct list_head list; /* space->members */ int nodeid; @@ -372,38 +377,40 @@ static struct config_item_type node_type = { .ct_owner = THIS_MODULE, }; -static struct cluster *to_cluster(struct config_item *i) +static struct dlm_cluster *to_cluster(struct config_item *i) { - return i ? container_of(to_config_group(i), struct cluster, group):NULL; + return i ? container_of(to_config_group(i), struct dlm_cluster, group) : + NULL; } -static struct space *to_space(struct config_item *i) +static struct dlm_space *to_space(struct config_item *i) { - return i ? container_of(to_config_group(i), struct space, group) : NULL; + return i ? container_of(to_config_group(i), struct dlm_space, group) : + NULL; } -static struct comm *to_comm(struct config_item *i) +static struct dlm_comm *to_comm(struct config_item *i) { - return i ? container_of(i, struct comm, item) : NULL; + return i ? container_of(i, struct dlm_comm, item) : NULL; } -static struct node *to_node(struct config_item *i) +static struct dlm_node *to_node(struct config_item *i) { - return i ? container_of(i, struct node, item) : NULL; + return i ? container_of(i, struct dlm_node, item) : NULL; } static struct config_group *make_cluster(struct config_group *g, const char *name) { - struct cluster *cl = NULL; - struct spaces *sps = NULL; - struct comms *cms = NULL; + struct dlm_cluster *cl = NULL; + struct dlm_spaces *sps = NULL; + struct dlm_comms *cms = NULL; void *gps = NULL; - cl = kzalloc(sizeof(struct cluster), GFP_KERNEL); + cl = kzalloc(sizeof(struct dlm_cluster), GFP_KERNEL); gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); - sps = kzalloc(sizeof(struct spaces), GFP_KERNEL); - cms = kzalloc(sizeof(struct comms), GFP_KERNEL); + sps = kzalloc(sizeof(struct dlm_spaces), GFP_KERNEL); + cms = kzalloc(sizeof(struct dlm_comms), GFP_KERNEL); if (!cl || !gps || !sps || !cms) goto fail; @@ -443,7 +450,7 @@ static struct config_group *make_cluster(struct config_group *g, static void drop_cluster(struct config_group *g, struct config_item *i) { - struct cluster *cl = to_cluster(i); + struct dlm_cluster *cl = to_cluster(i); struct config_item *tmp; int j; @@ -461,20 +468,20 @@ static void drop_cluster(struct config_group *g, struct config_item *i) static void release_cluster(struct config_item *i) { - struct cluster *cl = to_cluster(i); + struct dlm_cluster *cl = to_cluster(i); kfree(cl->group.default_groups); kfree(cl); } static struct config_group *make_space(struct config_group *g, const char *name) { - struct space *sp = NULL; - struct nodes *nds = NULL; + struct dlm_space *sp = NULL; + struct dlm_nodes *nds = NULL; void *gps = NULL; - sp = kzalloc(sizeof(struct space), GFP_KERNEL); + sp = kzalloc(sizeof(struct dlm_space), GFP_KERNEL); gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL); - nds = kzalloc(sizeof(struct nodes), GFP_KERNEL); + nds = kzalloc(sizeof(struct dlm_nodes), GFP_KERNEL); if (!sp || !gps || !nds) goto fail; @@ -500,7 +507,7 @@ static struct config_group *make_space(struct config_group *g, const char *name) static void drop_space(struct config_group *g, struct config_item *i) { - struct space *sp = to_space(i); + struct dlm_space *sp = to_space(i); struct config_item *tmp; int j; @@ -517,16 +524,16 @@ static void drop_space(struct config_group *g, struct config_item *i) static void release_space(struct config_item *i) { - struct space *sp = to_space(i); + struct dlm_space *sp = to_space(i); kfree(sp->group.default_groups); kfree(sp); } static struct config_item *make_comm(struct config_group *g, const char *name) { - struct comm *cm; + struct dlm_comm *cm; - cm = kzalloc(sizeof(struct comm), GFP_KERNEL); + cm = kzalloc(sizeof(struct dlm_comm), GFP_KERNEL); if (!cm) return ERR_PTR(-ENOMEM); @@ -539,7 +546,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name) static void drop_comm(struct config_group *g, struct config_item *i) { - struct comm *cm = to_comm(i); + struct dlm_comm *cm = to_comm(i); if (local_comm == cm) local_comm = NULL; dlm_lowcomms_close(cm->nodeid); @@ -550,16 +557,16 @@ static void drop_comm(struct config_group *g, struct config_item *i) static void release_comm(struct config_item *i) { - struct comm *cm = to_comm(i); + struct dlm_comm *cm = to_comm(i); kfree(cm); } static struct config_item *make_node(struct config_group *g, const char *name) { - struct space *sp = to_space(g->cg_item.ci_parent); - struct node *nd; + struct dlm_space *sp = to_space(g->cg_item.ci_parent); + struct dlm_node *nd; - nd = kzalloc(sizeof(struct node), GFP_KERNEL); + nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); if (!nd) return ERR_PTR(-ENOMEM); @@ -578,8 +585,8 @@ static struct config_item *make_node(struct config_group *g, const char *name) static void drop_node(struct config_group *g, struct config_item *i) { - struct space *sp = to_space(g->cg_item.ci_parent); - struct node *nd = to_node(i); + struct dlm_space *sp = to_space(g->cg_item.ci_parent); + struct dlm_node *nd = to_node(i); mutex_lock(&sp->members_lock); list_del(&nd->list); @@ -591,11 +598,11 @@ static void drop_node(struct config_group *g, struct config_item *i) static void release_node(struct config_item *i) { - struct node *nd = to_node(i); + struct dlm_node *nd = to_node(i); kfree(nd); } -static struct clusters clusters_root = { +static struct dlm_clusters clusters_root = { .subsys = { .su_group = { .cg_item = { @@ -625,7 +632,7 @@ void dlm_config_exit(void) static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, char *buf) { - struct cluster *cl = to_cluster(i); + struct dlm_cluster *cl = to_cluster(i); struct cluster_attribute *cla = container_of(a, struct cluster_attribute, attr); return cla->show ? cla->show(cl, buf) : 0; @@ -635,7 +642,7 @@ static ssize_t store_cluster(struct config_item *i, struct configfs_attribute *a, const char *buf, size_t len) { - struct cluster *cl = to_cluster(i); + struct dlm_cluster *cl = to_cluster(i); struct cluster_attribute *cla = container_of(a, struct cluster_attribute, attr); return cla->store ? cla->store(cl, buf, len) : -EINVAL; @@ -644,7 +651,7 @@ static ssize_t store_cluster(struct config_item *i, static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, char *buf) { - struct comm *cm = to_comm(i); + struct dlm_comm *cm = to_comm(i); struct comm_attribute *cma = container_of(a, struct comm_attribute, attr); return cma->show ? cma->show(cm, buf) : 0; @@ -653,29 +660,31 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, const char *buf, size_t len) { - struct comm *cm = to_comm(i); + struct dlm_comm *cm = to_comm(i); struct comm_attribute *cma = container_of(a, struct comm_attribute, attr); return cma->store ? cma->store(cm, buf, len) : -EINVAL; } -static ssize_t comm_nodeid_read(struct comm *cm, char *buf) +static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf) { return sprintf(buf, "%d\n", cm->nodeid); } -static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len) +static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, + size_t len) { cm->nodeid = simple_strtol(buf, NULL, 0); return len; } -static ssize_t comm_local_read(struct comm *cm, char *buf) +static ssize_t comm_local_read(struct dlm_comm *cm, char *buf) { return sprintf(buf, "%d\n", cm->local); } -static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len) +static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, + size_t len) { cm->local= simple_strtol(buf, NULL, 0); if (cm->local && !local_comm) @@ -683,7 +692,7 @@ static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len) return len; } -static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len) +static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) { struct sockaddr_storage *addr; @@ -705,7 +714,7 @@ static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len) static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, char *buf) { - struct node *nd = to_node(i); + struct dlm_node *nd = to_node(i); struct node_attribute *nda = container_of(a, struct node_attribute, attr); return nda->show ? nda->show(nd, buf) : 0; @@ -714,29 +723,31 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, const char *buf, size_t len) { - struct node *nd = to_node(i); + struct dlm_node *nd = to_node(i); struct node_attribute *nda = container_of(a, struct node_attribute, attr); return nda->store ? nda->store(nd, buf, len) : -EINVAL; } -static ssize_t node_nodeid_read(struct node *nd, char *buf) +static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf) { return sprintf(buf, "%d\n", nd->nodeid); } -static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len) +static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, + size_t len) { nd->nodeid = simple_strtol(buf, NULL, 0); return len; } -static ssize_t node_weight_read(struct node *nd, char *buf) +static ssize_t node_weight_read(struct dlm_node *nd, char *buf) { return sprintf(buf, "%d\n", nd->weight); } -static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) +static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, + size_t len) { nd->weight = simple_strtol(buf, NULL, 0); return len; @@ -746,7 +757,7 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) * Functions for the dlm to get the info that's been configured */ -static struct space *get_space(char *name) +static struct dlm_space *get_space(char *name) { struct config_item *i; @@ -760,15 +771,15 @@ static struct space *get_space(char *name) return to_space(i); } -static void put_space(struct space *sp) +static void put_space(struct dlm_space *sp) { config_item_put(&sp->group.cg_item); } -static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) +static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) { struct config_item *i; - struct comm *cm = NULL; + struct dlm_comm *cm = NULL; int found = 0; if (!comm_list) @@ -801,7 +812,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) return cm; } -static void put_comm(struct comm *cm) +static void put_comm(struct dlm_comm *cm) { config_item_put(&cm->item); } @@ -810,8 +821,8 @@ static void put_comm(struct comm *cm) int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, int **new_out, int *new_count_out) { - struct space *sp; - struct node *nd; + struct dlm_space *sp; + struct dlm_node *nd; int i = 0, rv = 0, ids_count = 0, new_count = 0; int *ids, *new; @@ -874,8 +885,8 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, int dlm_node_weight(char *lsname, int nodeid) { - struct space *sp; - struct node *nd; + struct dlm_space *sp; + struct dlm_node *nd; int w = -EEXIST; sp = get_space(lsname); @@ -897,7 +908,7 @@ int dlm_node_weight(char *lsname, int nodeid) int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) { - struct comm *cm = get_comm(nodeid, NULL); + struct dlm_comm *cm = get_comm(nodeid, NULL); if (!cm) return -EEXIST; if (!cm->addr_count) @@ -909,7 +920,7 @@ int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) { - struct comm *cm = get_comm(0, addr); + struct dlm_comm *cm = get_comm(0, addr); if (!cm) return -EEXIST; *nodeid = cm->nodeid; diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 929e48ae7591..34f14a14fb4e 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -527,8 +527,10 @@ static ssize_t device_write(struct file *file, const char __user *buf, k32buf = (struct dlm_write_request32 *)kbuf; kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - sizeof(struct dlm_write_request32)), GFP_KERNEL); - if (!kbuf) + if (!kbuf) { + kfree(k32buf); return -ENOMEM; + } if (proc) set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); @@ -539,8 +541,10 @@ static ssize_t device_write(struct file *file, const char __user *buf, /* do we really need this? can a write happen after a close? */ if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) && - (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) - return -EINVAL; + (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) { + error = -EINVAL; + goto out_free; + } sigfillset(&allsigs); sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 3a404e7fad53..291abb11e20e 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei } unlock_kernel(); - d_add(dentry, inode); - return NULL; + return d_splice_alias(inode, dentry); } static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0c87474f7917..7cc0eb756b55 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1041,10 +1041,7 @@ retry: } /* - * It opens an eventpoll file descriptor. The "size" parameter is there - * for historical reasons, when epoll was using an hash instead of an - * RB tree. With the current implementation, the "size" parameter is ignored - * (besides sanity checks). + * Open an eventpoll file descriptor. */ asmlinkage long sys_epoll_create1(int flags) { diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index c7d04e165446..694ed6fadcc8 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size) acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); - for (n=0; n < count; n++) { + for (n = 0; n < count; n++) { ext4_acl_entry *entry = (ext4_acl_entry *)value; if ((char *)value + sizeof(ext4_acl_entry_short) > end) goto fail; acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); - switch(acl->a_entries[n].e_tag) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - value = (char *)value + - sizeof(ext4_acl_entry_short); - acl->a_entries[n].e_id = ACL_UNDEFINED_ID; - break; - - case ACL_USER: - case ACL_GROUP: - value = (char *)value + sizeof(ext4_acl_entry); - if ((char *)value > end) - goto fail; - acl->a_entries[n].e_id = - le32_to_cpu(entry->e_id); - break; - - default: + + switch (acl->a_entries[n].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + value = (char *)value + + sizeof(ext4_acl_entry_short); + acl->a_entries[n].e_id = ACL_UNDEFINED_ID; + break; + + case ACL_USER: + case ACL_GROUP: + value = (char *)value + sizeof(ext4_acl_entry); + if ((char *)value > end) goto fail; + acl->a_entries[n].e_id = + le32_to_cpu(entry->e_id); + break; + + default: + goto fail; } } if (value != end) @@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) return ERR_PTR(-ENOMEM); ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); e = (char *)ext_acl + sizeof(ext4_acl_header); - for (n=0; n < acl->a_count; n++) { + for (n = 0; n < acl->a_count; n++) { ext4_acl_entry *entry = (ext4_acl_entry *)e; entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - switch(acl->a_entries[n].e_tag) { - case ACL_USER: - case ACL_GROUP: - entry->e_id = - cpu_to_le32(acl->a_entries[n].e_id); - e += sizeof(ext4_acl_entry); - break; - - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - e += sizeof(ext4_acl_entry_short); - break; - - default: - goto fail; + switch (acl->a_entries[n].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); + e += sizeof(ext4_acl_entry); + break; + + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + e += sizeof(ext4_acl_entry_short); + break; + + default: + goto fail; } } return (char *)ext_acl; @@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type) if (!test_opt(inode->i_sb, POSIX_ACL)) return NULL; - switch(type) { - case ACL_TYPE_ACCESS: - acl = ext4_iget_acl(inode, &ei->i_acl); - if (acl != EXT4_ACL_NOT_CACHED) - return acl; - name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; - break; - - case ACL_TYPE_DEFAULT: - acl = ext4_iget_acl(inode, &ei->i_default_acl); - if (acl != EXT4_ACL_NOT_CACHED) - return acl; - name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; - break; - - default: - return ERR_PTR(-EINVAL); + switch (type) { + case ACL_TYPE_ACCESS: + acl = ext4_iget_acl(inode, &ei->i_acl); + if (acl != EXT4_ACL_NOT_CACHED) + return acl; + name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + + case ACL_TYPE_DEFAULT: + acl = ext4_iget_acl(inode, &ei->i_default_acl); + if (acl != EXT4_ACL_NOT_CACHED) + return acl; + name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + + default: + return ERR_PTR(-EINVAL); } retval = ext4_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { @@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type) kfree(value); if (!IS_ERR(acl)) { - switch(type) { - case ACL_TYPE_ACCESS: - ext4_iset_acl(inode, &ei->i_acl, acl); - break; - - case ACL_TYPE_DEFAULT: - ext4_iset_acl(inode, &ei->i_default_acl, acl); - break; + switch (type) { + case ACL_TYPE_ACCESS: + ext4_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext4_iset_acl(inode, &ei->i_default_acl, acl); + break; } } return acl; @@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - switch(type) { - case ACL_TYPE_ACCESS: - name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); - if (error < 0) - return error; - else { - inode->i_mode = mode; - ext4_mark_inode_dirty(handle, inode); - if (error == 0) - acl = NULL; - } + switch (type) { + case ACL_TYPE_ACCESS: + name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + return error; + else { + inode->i_mode = mode; + ext4_mark_inode_dirty(handle, inode); + if (error == 0) + acl = NULL; } - break; + } + break; - case ACL_TYPE_DEFAULT: - name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - break; + case ACL_TYPE_DEFAULT: + name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; - default: - return -EINVAL; + default: + return -EINVAL; } if (acl) { value = ext4_acl_to_disk(acl, &size); @@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, kfree(value); if (!error) { - switch(type) { - case ACL_TYPE_ACCESS: - ext4_iset_acl(inode, &ei->i_acl, acl); - break; - - case ACL_TYPE_DEFAULT: - ext4_iset_acl(inode, &ei->i_default_acl, acl); - break; + switch (type) { + case ACL_TYPE_ACCESS: + ext4_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext4_iset_acl(inode, &ei->i_default_acl, acl); + break; } } return error; diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 495ab21b9832..e9fa960ba6da 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) if (unlikely(!bh)) { ext4_error(sb, __func__, "Cannot read block bitmap - " - "block_group = %d, block_bitmap = %llu", - (int)block_group, (unsigned long long)bitmap_blk); + "block_group = %lu, block_bitmap = %llu", + block_group, bitmap_blk); return NULL; } if (bh_uptodate_or_lock(bh)) return bh; + spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh, block_group, desc); set_buffer_uptodate(bh); unlock_buffer(bh); + spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); return bh; } + spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (bh_submit_read(bh) < 0) { put_bh(bh); ext4_error(sb, __func__, "Cannot read block bitmap - " - "block_group = %d, block_bitmap = %llu", - (int)block_group, (unsigned long long)bitmap_blk); + "block_group = %lu, block_bitmap = %llu", + block_group, bitmap_blk); return NULL; } ext4_valid_block_bitmap(sb, desc, block_group, bh); @@ -1623,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, free_blocks = percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); #endif + if (free_blocks <= root_blocks) + /* we don't have free space */ + return 0; if (free_blocks - root_blocks < nblocks) return free_blocks - root_blocks; return nblocks; diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d3d23d73c08b..ec8e33b45219 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent, get_dtype(sb, fname->file_type)); if (error) { filp->f_pos = curr_pos; - info->extra_fname = fname->next; + info->extra_fname = fname; return error; } fname = fname->next; @@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp, * If there are any leftover names on the hash collision * chain, return them first. */ - if (info->extra_fname && - call_filldir(filp, dirent, filldir, info->extra_fname)) - goto finished; + if (info->extra_fname) { + if (call_filldir(filp, dirent, filldir, info->extra_fname)) + goto finished; - if (!info->curr_node) + info->extra_fname = NULL; + info->curr_node = rb_next(info->curr_node); + if (!info->curr_node) { + if (info->next_hash == ~0) { + filp->f_pos = EXT4_HTREE_EOF; + goto finished; + } + info->curr_hash = info->next_hash; + info->curr_minor_hash = 0; + } + } else if (!info->curr_node) info->curr_node = rb_first(&info->root); while (1) { diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 303e41cf7b14..295003241d3d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp, /* inode.c */ -void ext4_da_release_space(struct inode *inode, int used, int to_free); int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr); struct buffer_head *ext4_getblk(handle_t *, struct inode *, @@ -1073,6 +1072,8 @@ extern void ext4_set_inode_flags(struct inode *); extern void ext4_get_inode_flags(struct ext4_inode_info *); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); +extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); +extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); @@ -1228,6 +1229,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; /* extents.c */ extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_writepage_trans_blocks(struct inode *, int); +extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, + int chunk); extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned long max_blocks, struct buffer_head *bh_result, diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 6c166c0a54b7..d33dc56d6986 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h @@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); extern int ext4_extent_tree_init(handle_t *, struct inode *); -extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); +extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, + int num, + struct ext4_ext_path *path); extern int ext4_ext_try_to_merge(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent *); diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index eb8bc3afe6e9..b455c685a98b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -51,6 +51,14 @@ EXT4_XATTR_TRANS_BLOCKS - 2 + \ 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) +/* + * Define the number of metadata blocks we need to account to modify data. + * + * This include super block, inode block, quota blocks and xattr blocks + */ +#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ + 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) + /* Delete operations potentially hit one directory's namespace plus an * entire inode, plus arbitrary amounts of bitmap/indirection data. Be * generous. We can grow the delete transaction later if necessary. */ diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 42c4c0c892ed..b24d3c53f20c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed) if (handle->h_buffer_credits > needed) return 0; err = ext4_journal_extend(handle, needed); - if (err) + if (err <= 0) return err; return ext4_journal_restart(handle, needed); } @@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode, /* * get the next allocated block if the extent in the path - * is before the requested block(s) + * is before the requested block(s) */ if (b2 < b1) { b2 = ext4_ext_next_allocated_block(path); @@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, } /* - * ext4_ext_calc_credits_for_insert: - * This routine returns max. credits that the extent tree can consume. - * It should be OK for low-performance paths like ->writepage() - * To allow many writing processes to fit into a single transaction, - * the caller should calculate credits under i_data_sem and - * pass the actual path. + * ext4_ext_calc_credits_for_single_extent: + * This routine returns max. credits that needed to insert an extent + * to the extent tree. + * When pass the actual path, the caller should calculate credits + * under i_data_sem. */ -int ext4_ext_calc_credits_for_insert(struct inode *inode, +int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, struct ext4_ext_path *path) { - int depth, needed; - if (path) { + int depth = ext_depth(inode); + int ret = 0; + /* probably there is space in leaf? */ - depth = ext_depth(inode); if (le16_to_cpu(path[depth].p_hdr->eh_entries) - < le16_to_cpu(path[depth].p_hdr->eh_max)) - return 1; - } - - /* - * given 32-bit logical block (4294967296 blocks), max. tree - * can be 4 levels in depth -- 4 * 340^4 == 53453440000. - * Let's also add one more level for imbalance. - */ - depth = 5; + < le16_to_cpu(path[depth].p_hdr->eh_max)) { - /* allocation of new data block(s) */ - needed = 2; + /* + * There are some space in the leaf tree, no + * need to account for leaf block credit + * + * bitmaps and block group descriptor blocks + * and other metadat blocks still need to be + * accounted. + */ + /* 1 bitmap, 1 block group descriptor */ + ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); + } + } - /* - * tree can be full, so it would need to grow in depth: - * we need one credit to modify old root, credits for - * new root will be added in split accounting - */ - needed += 1; + return ext4_chunk_trans_blocks(inode, nrblocks); +} - /* - * Index split can happen, we would need: - * allocate intermediate indexes (bitmap + group) - * + change two blocks at each level, but root (already included) - */ - needed += (depth * 2) + (depth * 2); +/* + * How many index/leaf blocks need to change/allocate to modify nrblocks? + * + * if nrblocks are fit in a single extent (chunk flag is 1), then + * in the worse case, each tree level index/leaf need to be changed + * if the tree split due to insert a new extent, then the old tree + * index/leaf need to be updated too + * + * If the nrblocks are discontiguous, they could cause + * the whole tree split more than once, but this is really rare. + */ +int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) +{ + int index; + int depth = ext_depth(inode); - /* any allocation modifies superblock */ - needed += 1; + if (chunk) + index = depth * 2; + else + index = depth * 3; - return needed; + return index; } static int ext4_remove_blocks(handle_t *handle, struct inode *inode, @@ -1910,16 +1917,18 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, BUG_ON(b != ex_ee_block + ex_ee_len - 1); } - /* at present, extent can't cross block group: */ - /* leaf + bitmap + group desc + sb + inode */ - credits = 5; + /* + * 3 for leaf, sb, and inode plus 2 (bmap and group + * descriptor) for each block group; assume two block + * groups plus ex_ee_len/blocks_per_block_group for + * the worst case + */ + credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb)); if (ex == EXT_FIRST_EXTENT(eh)) { correct_index = 1; credits += (ext_depth(inode)) + 1; } -#ifdef CONFIG_QUOTA credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); -#endif err = ext4_ext_journal_restart(handle, credits); if (err) @@ -2323,7 +2332,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, unsigned int newdepth; /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ if (allocated <= EXT4_EXT_ZERO_LEN) { - /* Mark first half uninitialized. + /* + * iblock == ee_block is handled by the zerouout + * at the beginning. + * Mark first half uninitialized. * Mark second half initialized and zero out the * initialized extent */ @@ -2346,7 +2358,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); - /* zeroed the full extent */ + /* blocks available from iblock */ return allocated; } else if (err) @@ -2374,6 +2386,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, err = PTR_ERR(path); return err; } + /* get the second half extent details */ ex = path[depth].p_ext; err = ext4_ext_get_access(handle, inode, path + depth); @@ -2403,6 +2416,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); /* zeroed the full extent */ + /* blocks available from iblock */ return allocated; } else if (err) @@ -2418,23 +2432,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, */ orig_ex.ee_len = cpu_to_le16(ee_len - ext4_ext_get_actual_len(ex3)); - if (newdepth != depth) { - depth = newdepth; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, iblock, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } - eh = path[depth].p_hdr; - ex = path[depth].p_ext; - if (ex2 != &newex) - ex2 = ex; - - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; + depth = newdepth; + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, iblock, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out; } + eh = path[depth].p_hdr; + ex = path[depth].p_ext; + if (ex2 != &newex) + ex2 = ex; + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + allocated = max_blocks; /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying @@ -2452,6 +2465,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); /* zero out the first half */ + /* blocks available from iblock */ return allocated; } } @@ -2796,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode) /* * probably first extent we're gonna free will be last in block */ - err = ext4_writepage_trans_blocks(inode) + 3; + err = ext4_writepage_trans_blocks(inode); handle = ext4_journal_start(inode, err); if (IS_ERR(handle)) return; @@ -2810,7 +2824,7 @@ void ext4_ext_truncate(struct inode *inode) down_write(&EXT4_I(inode)->i_data_sem); ext4_ext_invalidate_cache(inode); - ext4_mb_discard_inode_preallocations(inode); + ext4_discard_reservation(inode); /* * TODO: optimization is possible here. @@ -2849,27 +2863,6 @@ out_stop: ext4_journal_stop(handle); } -/* - * ext4_ext_writepage_trans_blocks: - * calculate max number of blocks we could modify - * in order to allocate new block for an inode - */ -int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) -{ - int needed; - - needed = ext4_ext_calc_credits_for_insert(inode, NULL); - - /* caller wants to allocate num blocks, but note it includes sb */ - needed = needed * num - (num - 1); - -#ifdef CONFIG_QUOTA - needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); -#endif - - return needed; -} - static void ext4_falloc_update_inode(struct inode *inode, int mode, loff_t new_size, int update_ctime) { @@ -2930,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - block; /* - * credits to insert 1 extent into extent tree + buffers to be able to - * modify 1 super block, 1 block bitmap and 1 group descriptor. + * credits to insert 1 extent into extent tree */ - credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; + credits = ext4_chunk_trans_blocks(inode, max_blocks); mutex_lock(&inode->i_mutex); retry: while (ret >= 0 && ret < max_blocks) { diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index a92eb305344f..f344834bbf58 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, * Return buffer_head of bitmap on success or NULL. */ static struct buffer_head * -read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) +ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) { struct ext4_group_desc *desc; struct buffer_head *bh = NULL; + ext4_fsblk_t bitmap_blk; desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) - goto error_out; - if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); - if (!buffer_uptodate(bh)) { - lock_buffer(bh); - if (!buffer_uptodate(bh)) { - ext4_init_inode_bitmap(sb, bh, block_group, - desc); - set_buffer_uptodate(bh); - } - unlock_buffer(bh); - } - } else { - bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + return NULL; + bitmap_blk = ext4_inode_bitmap(sb, desc); + bh = sb_getblk(sb, bitmap_blk); + if (unlikely(!bh)) { + ext4_error(sb, __func__, + "Cannot read inode bitmap - " + "block_group = %lu, inode_bitmap = %llu", + block_group, bitmap_blk); + return NULL; } - if (!bh) - ext4_error(sb, "read_inode_bitmap", + if (bh_uptodate_or_lock(bh)) + return bh; + + spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); + if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + ext4_init_inode_bitmap(sb, bh, block_group, desc); + set_buffer_uptodate(bh); + unlock_buffer(bh); + spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + return bh; + } + spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + if (bh_submit_read(bh) < 0) { + put_bh(bh); + ext4_error(sb, __func__, "Cannot read inode bitmap - " "block_group = %lu, inode_bitmap = %llu", - block_group, ext4_inode_bitmap(sb, desc)); -error_out: + block_group, bitmap_blk); + return NULL; + } return bh; } @@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); - bitmap_bh = read_inode_bitmap(sb, block_group); + bitmap_bh = ext4_read_inode_bitmap(sb, block_group); if (!bitmap_bh) goto error_return; @@ -341,7 +351,7 @@ find_close_to_parent: goto found_flexbg; } - if (best_flex < 0 || + if (flex_group[best_flex].free_inodes == 0 || (flex_group[i].free_blocks > flex_group[best_flex].free_blocks && flex_group[i].free_inodes)) @@ -623,7 +633,7 @@ got_group: goto fail; brelse(bitmap_bh); - bitmap_bh = read_inode_bitmap(sb, group); + bitmap_bh = ext4_read_inode_bitmap(sb, group); if (!bitmap_bh) goto fail; @@ -728,7 +738,7 @@ got: /* When marking the block group with * ~EXT4_BG_INODE_UNINIT we don't want to depend - * on the value of bg_itable_unsed even though + * on the value of bg_itable_unused even though * mke2fs could have initialized the same for us. * Instead we calculated the value below */ @@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); - bitmap_bh = read_inode_bitmap(sb, block_group); + bitmap_bh = ext4_read_inode_bitmap(sb, block_group); if (!bitmap_bh) { ext4_warning(sb, __func__, "inode bitmap error for orphan %lu", ino); @@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) continue; desc_count += le16_to_cpu(gdp->bg_free_inodes_count); brelse(bitmap_bh); - bitmap_bh = read_inode_bitmap(sb, i); + bitmap_bh = ext4_read_inode_bitmap(sb, i); if (!bitmap_bh) continue; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9843b046c235..7e91913e325b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -41,6 +41,8 @@ #include "acl.h" #include "ext4_extents.h" +#define MPAGE_DA_EXTENT_TAIL 0x01 + static inline int ext4_begin_ordered_truncate(struct inode *inode, loff_t new_size) { @@ -191,6 +193,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) void ext4_delete_inode (struct inode * inode) { handle_t *handle; + int err; if (ext4_should_order_data(inode)) ext4_begin_ordered_truncate(inode, 0); @@ -199,8 +202,9 @@ void ext4_delete_inode (struct inode * inode) if (is_bad_inode(inode)) goto no_delete; - handle = start_transaction(inode); + handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3); if (IS_ERR(handle)) { + ext4_std_error(inode->i_sb, PTR_ERR(handle)); /* * If we're going to skip the normal cleanup, we still need to * make sure that the in-core orphan linked list is properly @@ -213,8 +217,34 @@ void ext4_delete_inode (struct inode * inode) if (IS_SYNC(inode)) handle->h_sync = 1; inode->i_size = 0; + err = ext4_mark_inode_dirty(handle, inode); + if (err) { + ext4_warning(inode->i_sb, __func__, + "couldn't mark inode dirty (err %d)", err); + goto stop_handle; + } if (inode->i_blocks) ext4_truncate(inode); + + /* + * ext4_ext_truncate() doesn't reserve any slop when it + * restarts journal transactions; therefore there may not be + * enough credits left in the handle to remove the inode from + * the orphan list and set the dtime field. + */ + if (handle->h_buffer_credits < 3) { + err = ext4_journal_extend(handle, 3); + if (err > 0) + err = ext4_journal_restart(handle, 3); + if (err != 0) { + ext4_warning(inode->i_sb, __func__, + "couldn't extend journal (err %d)", err); + stop_handle: + ext4_journal_stop(handle); + goto no_delete; + } + } + /* * Kill off the orphan record which ext4_truncate created. * AKPM: I think this can be inside the above `if'. @@ -952,23 +982,74 @@ out: return err; } -/* Maximum number of blocks we map for direct IO at once. */ -#define DIO_MAX_BLOCKS 4096 /* - * Number of credits we need for writing DIO_MAX_BLOCKS: - * We need sb + group descriptor + bitmap + inode -> 4 - * For B blocks with A block pointers per block we need: - * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). - * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. + * Calculate the number of metadata blocks need to reserve + * to allocate @blocks for non extent file based file */ -#define DIO_CREDITS 25 +static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) +{ + int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); + int ind_blks, dind_blks, tind_blks; + + /* number of new indirect blocks needed */ + ind_blks = (blocks + icap - 1) / icap; + + dind_blks = (ind_blks + icap - 1) / icap; + tind_blks = 1; + + return ind_blks + dind_blks + tind_blks; +} /* + * Calculate the number of metadata blocks need to reserve + * to allocate given number of blocks + */ +static int ext4_calc_metadata_amount(struct inode *inode, int blocks) +{ + if (!blocks) + return 0; + + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + return ext4_ext_calc_metadata_amount(inode, blocks); + + return ext4_indirect_calc_metadata_amount(inode, blocks); +} + +static void ext4_da_update_reserve_space(struct inode *inode, int used) +{ + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + int total, mdb, mdb_free; + + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + /* recalculate the number of metablocks still need to be reserved */ + total = EXT4_I(inode)->i_reserved_data_blocks - used; + mdb = ext4_calc_metadata_amount(inode, total); + + /* figure out how many metablocks to release */ + BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); + mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; + + /* Account for allocated meta_blocks */ + mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; + + /* update fs free blocks counter for truncate case */ + percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); + + /* update per-inode reservations */ + BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); + EXT4_I(inode)->i_reserved_data_blocks -= used; + + BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); + EXT4_I(inode)->i_reserved_meta_blocks = mdb; + EXT4_I(inode)->i_allocated_meta_blocks = 0; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); +} + +/* + * The ext4_get_blocks_wrap() function try to look up the requested blocks, + * and returns if the blocks are already mapped. * - * - * ext4_ext4 get_block() wrapper function - * It will do a look up first, and returns if the blocks already mapped. * Otherwise it takes the write lock of the i_data_sem and allocate blocks * and store the allocated blocks in the result buffer head and mark it * mapped. @@ -1069,26 +1150,30 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, * which were deferred till now */ if ((retval > 0) && buffer_delay(bh)) - ext4_da_release_space(inode, retval, 0); + ext4_da_update_reserve_space(inode, retval); } up_write((&EXT4_I(inode)->i_data_sem)); return retval; } +/* Maximum number of blocks we map for direct IO at once. */ +#define DIO_MAX_BLOCKS 4096 + static int ext4_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { handle_t *handle = ext4_journal_current_handle(); int ret = 0, started = 0; unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; + int dio_credits; if (create && !handle) { /* Direct IO write... */ if (max_blocks > DIO_MAX_BLOCKS) max_blocks = DIO_MAX_BLOCKS; - handle = ext4_journal_start(inode, DIO_CREDITS + - 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); + dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); + handle = ext4_journal_start(inode, dio_credits); if (IS_ERR(handle)) { ret = PTR_ERR(handle); goto out; @@ -1336,12 +1421,8 @@ static int ext4_ordered_write_end(struct file *file, { handle_t *handle = ext4_journal_current_handle(); struct inode *inode = mapping->host; - unsigned from, to; int ret = 0, ret2; - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + len; - ret = ext4_jbd2_file_inode(handle, inode); if (ret == 0) { @@ -1437,36 +1518,6 @@ static int ext4_journalled_write_end(struct file *file, return ret ? ret : copied; } -/* - * Calculate the number of metadata blocks need to reserve - * to allocate @blocks for non extent file based file - */ -static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) -{ - int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int ind_blks, dind_blks, tind_blks; - - /* number of new indirect blocks needed */ - ind_blks = (blocks + icap - 1) / icap; - - dind_blks = (ind_blks + icap - 1) / icap; - - tind_blks = 1; - - return ind_blks + dind_blks + tind_blks; -} - -/* - * Calculate the number of metadata blocks need to reserve - * to allocate given number of blocks - */ -static int ext4_calc_metadata_amount(struct inode *inode, int blocks) -{ - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_calc_metadata_amount(inode, blocks); - - return ext4_indirect_calc_metadata_amount(inode, blocks); -} static int ext4_da_reserve_space(struct inode *inode, int nrblocks) { @@ -1490,7 +1541,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); return -ENOSPC; } - /* reduce fs free blocks counter */ percpu_counter_sub(&sbi->s_freeblocks_counter, total); @@ -1501,35 +1551,49 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) return 0; /* success */ } -void ext4_da_release_space(struct inode *inode, int used, int to_free) +static void ext4_da_release_space(struct inode *inode, int to_free) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); int total, mdb, mdb_free, release; + if (!to_free) + return; /* Nothing to release, exit */ + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + + if (!EXT4_I(inode)->i_reserved_data_blocks) { + /* + * if there is no reserved blocks, but we try to free some + * then the counter is messed up somewhere. + * but since this function is called from invalidate + * page, it's harmless to return without any action + */ + printk(KERN_INFO "ext4 delalloc try to release %d reserved " + "blocks for inode %lu, but there is no reserved " + "data blocks\n", to_free, inode->i_ino); + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + return; + } + /* recalculate the number of metablocks still need to be reserved */ - total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; + total = EXT4_I(inode)->i_reserved_data_blocks - to_free; mdb = ext4_calc_metadata_amount(inode, total); /* figure out how many metablocks to release */ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; - /* Account for allocated meta_blocks */ - mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; - release = to_free + mdb_free; /* update fs free blocks counter for truncate case */ percpu_counter_add(&sbi->s_freeblocks_counter, release); /* update per-inode reservations */ - BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); - EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); + BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); + EXT4_I(inode)->i_reserved_data_blocks -= to_free; BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); EXT4_I(inode)->i_reserved_meta_blocks = mdb; - EXT4_I(inode)->i_allocated_meta_blocks = 0; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); } @@ -1551,7 +1615,7 @@ static void ext4_da_page_release_reservation(struct page *page, } curr_off = next_off; } while ((bh = bh->b_this_page) != head); - ext4_da_release_space(page->mapping->host, 0, to_release); + ext4_da_release_space(page->mapping->host, to_release); } /* @@ -1564,11 +1628,13 @@ struct mpage_da_data { unsigned long first_page, next_page; /* extent of pages */ get_block_t *get_block; struct writeback_control *wbc; + int io_done; + long pages_written; }; /* * mpage_da_submit_io - walks through extent of pages and try to write - * them with __mpage_writepage() + * them with writepage() call back * * @mpd->inode: inode * @mpd->first_page: first page of the extent @@ -1583,18 +1649,11 @@ struct mpage_da_data { static int mpage_da_submit_io(struct mpage_da_data *mpd) { struct address_space *mapping = mpd->inode->i_mapping; - struct mpage_data mpd_pp = { - .bio = NULL, - .last_block_in_bio = 0, - .get_block = mpd->get_block, - .use_writepage = 1, - }; int ret = 0, err, nr_pages, i; unsigned long index, end; struct pagevec pvec; BUG_ON(mpd->next_page <= mpd->first_page); - pagevec_init(&pvec, 0); index = mpd->first_page; end = mpd->next_page - 1; @@ -1612,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) break; index++; - err = __mpage_writepage(page, mpd->wbc, &mpd_pp); - + err = mapping->a_ops->writepage(page, mpd->wbc); + if (!err) + mpd->pages_written++; /* * In error case, we have to continue because * remaining pages are still locked @@ -1624,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) } pagevec_release(&pvec); } - if (mpd_pp.bio) - mpage_bio_submit(WRITE, mpd_pp.bio); - return ret; } @@ -1649,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, int blocks = exbh->b_size >> inode->i_blkbits; sector_t pblock = exbh->b_blocknr, cur_logical; struct buffer_head *head, *bh; - unsigned long index, end; + pgoff_t index, end; struct pagevec pvec; int nr_pages, i; @@ -1692,6 +1749,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, if (buffer_delay(bh)) { bh->b_blocknr = pblock; clear_buffer_delay(bh); + bh->b_bdev = inode->i_sb->s_bdev; + } else if (buffer_unwritten(bh)) { + bh->b_blocknr = pblock; + clear_buffer_unwritten(bh); + set_buffer_mapped(bh); + set_buffer_new(bh); + bh->b_bdev = inode->i_sb->s_bdev; } else if (buffer_mapped(bh)) BUG_ON(bh->b_blocknr != pblock); @@ -1727,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode, * * The function skips space we know is already mapped to disk blocks. * - * The function ignores errors ->get_block() returns, thus real - * error handling is postponed to __mpage_writepage() */ static void mpage_da_map_blocks(struct mpage_da_data *mpd) { + int err = 0; struct buffer_head *lbh = &mpd->lbh; - int err = 0, remain = lbh->b_size; sector_t next = lbh->b_blocknr; struct buffer_head new; @@ -1743,38 +1805,36 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) if (buffer_mapped(lbh) && !buffer_delay(lbh)) return; - while (remain) { - new.b_state = lbh->b_state; - new.b_blocknr = 0; - new.b_size = remain; - err = mpd->get_block(mpd->inode, next, &new, 1); - if (err) { - /* - * Rather than implement own error handling - * here, we just leave remaining blocks - * unallocated and try again with ->writepage() - */ - break; - } - BUG_ON(new.b_size == 0); + new.b_state = lbh->b_state; + new.b_blocknr = 0; + new.b_size = lbh->b_size; - if (buffer_new(&new)) - __unmap_underlying_blocks(mpd->inode, &new); + /* + * If we didn't accumulate anything + * to write simply return + */ + if (!new.b_size) + return; + err = mpd->get_block(mpd->inode, next, &new, 1); + if (err) + return; + BUG_ON(new.b_size == 0); - /* - * If blocks are delayed marked, we need to - * put actual blocknr and drop delayed bit - */ - if (buffer_delay(lbh)) - mpage_put_bnr_to_bhs(mpd, next, &new); + if (buffer_new(&new)) + __unmap_underlying_blocks(mpd->inode, &new); - /* go for the remaining blocks */ - next += new.b_size >> mpd->inode->i_blkbits; - remain -= new.b_size; - } + /* + * If blocks are delayed marked, we need to + * put actual blocknr and drop delayed bit + */ + if (buffer_delay(lbh) || buffer_unwritten(lbh)) + mpage_put_bnr_to_bhs(mpd, next, &new); + + return; } -#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) +#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ + (1 << BH_Delay) | (1 << BH_Unwritten)) /* * mpage_add_bh_to_extent - try to add one more block to extent of blocks @@ -1788,41 +1848,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical, struct buffer_head *bh) { - struct buffer_head *lbh = &mpd->lbh; sector_t next; + size_t b_size = bh->b_size; + struct buffer_head *lbh = &mpd->lbh; + int nrblocks = lbh->b_size >> mpd->inode->i_blkbits; - next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); - + /* check if thereserved journal credits might overflow */ + if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { + if (nrblocks >= EXT4_MAX_TRANS_DATA) { + /* + * With non-extent format we are limited by the journal + * credit available. Total credit needed to insert + * nrblocks contiguous blocks is dependent on the + * nrblocks. So limit nrblocks. + */ + goto flush_it; + } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > + EXT4_MAX_TRANS_DATA) { + /* + * Adding the new buffer_head would make it cross the + * allowed limit for which we have journal credit + * reserved. So limit the new bh->b_size + */ + b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << + mpd->inode->i_blkbits; + /* we will do mpage_da_submit_io in the next loop */ + } + } /* * First block in the extent */ if (lbh->b_size == 0) { lbh->b_blocknr = logical; - lbh->b_size = bh->b_size; + lbh->b_size = b_size; lbh->b_state = bh->b_state & BH_FLAGS; return; } + next = lbh->b_blocknr + nrblocks; /* * Can we merge the block to our big extent? */ if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { - lbh->b_size += bh->b_size; + lbh->b_size += b_size; return; } +flush_it: /* * We couldn't merge the block to our extent, so we * need to flush current extent and start new one */ mpage_da_map_blocks(mpd); - - /* - * Now start a new extent - */ - lbh->b_size = bh->b_size; - lbh->b_state = bh->b_state & BH_FLAGS; - lbh->b_blocknr = logical; + mpage_da_submit_io(mpd); + mpd->io_done = 1; + return; } /* @@ -1842,17 +1922,35 @@ static int __mpage_da_writepage(struct page *page, struct buffer_head *bh, *head, fake; sector_t logical; + if (mpd->io_done) { + /* + * Rest of the page in the page_vec + * redirty then and skip then. We will + * try to to write them again after + * starting a new transaction + */ + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return MPAGE_DA_EXTENT_TAIL; + } /* * Can we merge this page to current extent? */ if (mpd->next_page != page->index) { /* * Nope, we can't. So, we map non-allocated blocks - * and start IO on them using __mpage_writepage() + * and start IO on them using writepage() */ if (mpd->next_page != mpd->first_page) { mpage_da_map_blocks(mpd); mpage_da_submit_io(mpd); + /* + * skip rest of the page in the page_vec + */ + mpd->io_done = 1; + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return MPAGE_DA_EXTENT_TAIL; } /* @@ -1883,6 +1981,8 @@ static int __mpage_da_writepage(struct page *page, set_buffer_dirty(bh); set_buffer_uptodate(bh); mpage_add_bh_to_extent(mpd, logical, bh); + if (mpd->io_done) + return MPAGE_DA_EXTENT_TAIL; } else { /* * Page with regular buffer heads, just add all dirty ones @@ -1891,8 +1991,12 @@ static int __mpage_da_writepage(struct page *page, bh = head; do { BUG_ON(buffer_locked(bh)); - if (buffer_dirty(bh)) + if (buffer_dirty(bh) && + (!buffer_mapped(bh) || buffer_delay(bh))) { mpage_add_bh_to_extent(mpd, logical, bh); + if (mpd->io_done) + return MPAGE_DA_EXTENT_TAIL; + } logical++; } while ((bh = bh->b_this_page) != head); } @@ -1911,22 +2015,13 @@ static int __mpage_da_writepage(struct page *page, * * This is a library function, which implements the writepages() * address_space_operation. - * - * In order to avoid duplication of logic that deals with partial pages, - * multiple bio per page, etc, we find non-allocated blocks, allocate - * them with minimal calls to ->get_block() and re-use __mpage_writepage() - * - * It's important that we call __mpage_writepage() only once for each - * involved page, otherwise we'd have to implement more complicated logic - * to deal with pages w/o PG_lock or w/ PG_writeback and so on. - * - * See comments to mpage_writepages() */ static int mpage_da_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block) { struct mpage_da_data mpd; + long to_write; int ret; if (!get_block) @@ -1940,17 +2035,22 @@ static int mpage_da_writepages(struct address_space *mapping, mpd.first_page = 0; mpd.next_page = 0; mpd.get_block = get_block; + mpd.io_done = 0; + mpd.pages_written = 0; + + to_write = wbc->nr_to_write; ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); /* * Handle last extent of pages */ - if (mpd.next_page != mpd.first_page) { + if (!mpd.io_done && mpd.next_page != mpd.first_page) { mpage_da_map_blocks(&mpd); mpage_da_submit_io(&mpd); } + wbc->nr_to_write = to_write - mpd.pages_written; return ret; } @@ -2155,63 +2255,95 @@ static int ext4_da_writepage(struct page *page, } /* - * For now just follow the DIO way to estimate the max credits - * needed to write out EXT4_MAX_WRITEBACK_PAGES. - * todo: need to calculate the max credits need for - * extent based files, currently the DIO credits is based on - * indirect-blocks mapping way. - * - * Probably should have a generic way to calculate credits - * for DIO, writepages, and truncate + * This is called via ext4_da_writepages() to + * calulate the total number of credits to reserve to fit + * a single extent allocation into a single transaction, + * ext4_da_writpeages() will loop calling this before + * the block allocation. */ -#define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS -#define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS + +static int ext4_da_writepages_trans_blocks(struct inode *inode) +{ + int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; + + /* + * With non-extent format the journal credit needed to + * insert nrblocks contiguous block is dependent on + * number of contiguous block. So we will limit + * number of contiguous block to a sane value + */ + if (!(inode->i_flags & EXT4_EXTENTS_FL) && + (max_blocks > EXT4_MAX_TRANS_DATA)) + max_blocks = EXT4_MAX_TRANS_DATA; + + return ext4_chunk_trans_blocks(inode, max_blocks); +} static int ext4_da_writepages(struct address_space *mapping, - struct writeback_control *wbc) + struct writeback_control *wbc) { - struct inode *inode = mapping->host; handle_t *handle = NULL; - int needed_blocks; - int ret = 0; - long to_write; loff_t range_start = 0; + struct inode *inode = mapping->host; + int needed_blocks, ret = 0, nr_to_writebump = 0; + long to_write, pages_skipped = 0; + struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); /* * No pages to write? This is mainly a kludge to avoid starting * a transaction for special inodes like journal inode on last iput() * because that could violate lock ordering on umount */ - if (!mapping->nrpages) + if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) return 0; - /* - * Estimate the worse case needed credits to write out - * EXT4_MAX_BUF_BLOCKS pages + * Make sure nr_to_write is >= sbi->s_mb_stream_request + * This make sure small files blocks are allocated in + * single attempt. This ensure that small files + * get less fragmented. */ - needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; + if (wbc->nr_to_write < sbi->s_mb_stream_request) { + nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; + wbc->nr_to_write = sbi->s_mb_stream_request; + } - to_write = wbc->nr_to_write; - if (!wbc->range_cyclic) { + if (!wbc->range_cyclic) /* * If range_cyclic is not set force range_cont * and save the old writeback_index */ wbc->range_cont = 1; - range_start = wbc->range_start; - } - while (!ret && to_write) { + range_start = wbc->range_start; + pages_skipped = wbc->pages_skipped; + +restart_loop: + to_write = wbc->nr_to_write; + while (!ret && to_write > 0) { + + /* + * we insert one extent at a time. So we need + * credit needed for single extent allocation. + * journalled mode is currently not supported + * by delalloc + */ + BUG_ON(ext4_should_journal_data(inode)); + needed_blocks = ext4_da_writepages_trans_blocks(inode); + /* start a new transaction*/ handle = ext4_journal_start(inode, needed_blocks); if (IS_ERR(handle)) { ret = PTR_ERR(handle); + printk(KERN_EMERG "%s: jbd2_start: " + "%ld pages, ino %lu; err %d\n", __func__, + wbc->nr_to_write, inode->i_ino, ret); + dump_stack(); goto out_writepages; } if (ext4_should_order_data(inode)) { /* * With ordered mode we need to add - * the inode to the journal handle + * the inode to the journal handl * when we do block allocation. */ ret = ext4_jbd2_file_inode(handle, inode); @@ -2219,20 +2351,20 @@ static int ext4_da_writepages(struct address_space *mapping, ext4_journal_stop(handle); goto out_writepages; } - } - /* - * set the max dirty pages could be write at a time - * to fit into the reserved transaction credits - */ - if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) - wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; to_write -= wbc->nr_to_write; ret = mpage_da_writepages(mapping, wbc, - ext4_da_get_block_write); + ext4_da_get_block_write); ext4_journal_stop(handle); - if (wbc->nr_to_write) { + if (ret == MPAGE_DA_EXTENT_TAIL) { + /* + * got one extent now try with + * rest of the pages + */ + to_write += wbc->nr_to_write; + ret = 0; + } else if (wbc->nr_to_write) { /* * There is no more writeout needed * or we requested for a noblocking writeout @@ -2244,10 +2376,18 @@ static int ext4_da_writepages(struct address_space *mapping, wbc->nr_to_write = to_write; } -out_writepages: - wbc->nr_to_write = to_write; - if (range_start) + if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) { + /* We skipped pages in this loop */ wbc->range_start = range_start; + wbc->nr_to_write = to_write + + wbc->pages_skipped - pages_skipped; + wbc->pages_skipped = pages_skipped; + goto restart_loop; + } + +out_writepages: + wbc->nr_to_write = to_write - nr_to_writebump; + wbc->range_start = range_start; return ret; } @@ -2280,8 +2420,11 @@ retry: } page = __grab_cache_page(mapping, index); - if (!page) - return -ENOMEM; + if (!page) { + ext4_journal_stop(handle); + ret = -ENOMEM; + goto out; + } *pagep = page; ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, @@ -3434,6 +3577,9 @@ void ext4_truncate(struct inode *inode) * modify the block allocation tree. */ down_write(&ei->i_data_sem); + + ext4_discard_reservation(inode); + /* * The orphan list entry will now protect us from any crash which * occurs before the truncate completes, so it is now safe to propagate @@ -3503,8 +3649,6 @@ do_indirects: ; } - ext4_discard_reservation(inode); - up_write(&ei->i_data_sem); inode->i_mtime = inode->i_ctime = ext4_current_time(inode); ext4_mark_inode_dirty(handle, inode); @@ -3590,6 +3734,16 @@ static int __ext4_get_inode_loc(struct inode *inode, } if (!buffer_uptodate(bh)) { lock_buffer(bh); + + /* + * If the buffer has the write error flag, we have failed + * to write out another inode in the same block. In this + * case, we don't have to read the block because we may + * read the old inode data successfully. + */ + if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) + set_buffer_uptodate(bh); + if (buffer_uptodate(bh)) { /* someone brought it uptodate while we waited */ unlock_buffer(bh); @@ -4262,57 +4416,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, return 0; } +static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, + int chunk) +{ + int indirects; + + /* if nrblocks are contiguous */ + if (chunk) { + /* + * With N contiguous data blocks, it need at most + * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks + * 2 dindirect blocks + * 1 tindirect block + */ + indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); + return indirects + 3; + } + /* + * if nrblocks are not contiguous, worse case, each block touch + * a indirect block, and each indirect block touch a double indirect + * block, plus a triple indirect block + */ + indirects = nrblocks * 2 + 1; + return indirects; +} + +static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) +{ + if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) + return ext4_indirect_trans_blocks(inode, nrblocks, 0); + return ext4_ext_index_trans_blocks(inode, nrblocks, 0); +} /* - * How many blocks doth make a writepage()? + * Account for index blocks, block groups bitmaps and block group + * descriptor blocks if modify datablocks and index blocks + * worse case, the indexs blocks spread over different block groups * - * With N blocks per page, it may be: - * N data blocks - * 2 indirect block - * 2 dindirect - * 1 tindirect - * N+5 bitmap blocks (from the above) - * N+5 group descriptor summary blocks - * 1 inode block - * 1 superblock. - * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files + * If datablocks are discontiguous, they are possible to spread over + * different block groups too. If they are contiugous, with flexbg, + * they could still across block group boundary. * - * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS - * - * With ordered or writeback data it's the same, less the N data blocks. + * Also account for superblock, inode, quota and xattr blocks + */ +int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) +{ + int groups, gdpblocks; + int idxblocks; + int ret = 0; + + /* + * How many index blocks need to touch to modify nrblocks? + * The "Chunk" flag indicating whether the nrblocks is + * physically contiguous on disk + * + * For Direct IO and fallocate, they calls get_block to allocate + * one single extent at a time, so they could set the "Chunk" flag + */ + idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); + + ret = idxblocks; + + /* + * Now let's see how many group bitmaps and group descriptors need + * to account + */ + groups = idxblocks; + if (chunk) + groups += 1; + else + groups += nrblocks; + + gdpblocks = groups; + if (groups > EXT4_SB(inode->i_sb)->s_groups_count) + groups = EXT4_SB(inode->i_sb)->s_groups_count; + if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) + gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; + + /* bitmaps and block group descriptor blocks */ + ret += groups + gdpblocks; + + /* Blocks for super block, inode, quota and xattr blocks */ + ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); + + return ret; +} + +/* + * Calulate the total number of credits to reserve to fit + * the modification of a single pages into a single transaction, + * which may include multiple chunks of block allocations. * - * If the inode's direct blocks can hold an integral number of pages then a - * page cannot straddle two indirect blocks, and we can only touch one indirect - * and dindirect block, and the "5" above becomes "3". + * This could be called via ext4_write_begin() * - * This still overestimates under most circumstances. If we were to pass the - * start and end offsets in here as well we could do block_to_path() on each - * block and work out the exact number of indirects which are touched. Pah. + * We need to consider the worse case, when + * one new block per extent. */ - int ext4_writepage_trans_blocks(struct inode *inode) { int bpp = ext4_journal_blocks_per_page(inode); - int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_writepage_trans_blocks(inode, bpp); + ret = ext4_meta_trans_blocks(inode, bpp, 0); + /* Account for data blocks for journalled mode */ if (ext4_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else - ret = 2 * (bpp + indirects) + 2; - -#ifdef CONFIG_QUOTA - /* We know that structure was already allocated during DQUOT_INIT so - * we will be updating only the data blocks + inodes */ - ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); -#endif - + ret += bpp; return ret; } /* + * Calculate the journal credits for a chunk of data modification. + * + * This is called from DIO, fallocate or whoever calling + * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. + * + * journal buffers for data blocks are not included here, as DIO + * and fallocate do no need to journal data buffers. + */ +int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) +{ + return ext4_meta_trans_blocks(inode, nrblocks, 1); +} + +/* * The caller must have previously called ext4_reserve_inode_write(). * Give this, we know that the caller already has write access to iloc->bh. */ diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8d141a25bbee..e0e3a5eb1ddb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore) if (bh_uptodate_or_lock(bh[i])) continue; + spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh[i], first_group + i, desc); set_buffer_uptodate(bh[i]); unlock_buffer(bh[i]); + spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); continue; } + spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); get_bh(bh[i]); bh[i]->b_end_io = end_buffer_read_sync; submit_bh(READ, bh[i]); @@ -2477,7 +2480,7 @@ err_freesgi: int ext4_mb_init(struct super_block *sb, int needs_recovery) { struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned i; + unsigned i, j; unsigned offset; unsigned max; int ret; @@ -2537,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; - i = sizeof(struct ext4_locality_group) * NR_CPUS; + i = sizeof(struct ext4_locality_group) * nr_cpu_ids; sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); if (sbi->s_locality_groups == NULL) { clear_opt(sbi->s_mount_opt, MBALLOC); @@ -2545,11 +2548,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) kfree(sbi->s_mb_maxs); return -ENOMEM; } - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { struct ext4_locality_group *lg; lg = &sbi->s_locality_groups[i]; mutex_init(&lg->lg_mutex); - INIT_LIST_HEAD(&lg->lg_prealloc_list); + for (j = 0; j < PREALLOC_TB_SIZE; j++) + INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); spin_lock_init(&lg->lg_prealloc_lock); } @@ -3260,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa) { unsigned int len = ac->ac_o_ex.fe_len; + ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, &ac->ac_b_ex.fe_group, &ac->ac_b_ex.fe_start); @@ -3277,14 +3282,45 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, } /* + * Return the prealloc space that have minimal distance + * from the goal block. @cpa is the prealloc + * space that is having currently known minimal distance + * from the goal block. + */ +static struct ext4_prealloc_space * +ext4_mb_check_group_pa(ext4_fsblk_t goal_block, + struct ext4_prealloc_space *pa, + struct ext4_prealloc_space *cpa) +{ + ext4_fsblk_t cur_distance, new_distance; + + if (cpa == NULL) { + atomic_inc(&pa->pa_count); + return pa; + } + cur_distance = abs(goal_block - cpa->pa_pstart); + new_distance = abs(goal_block - pa->pa_pstart); + + if (cur_distance < new_distance) + return cpa; + + /* drop the previous reference */ + atomic_dec(&cpa->pa_count); + atomic_inc(&pa->pa_count); + return pa; +} + +/* * search goal blocks in preallocated space */ static noinline_for_stack int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) { + int order, i; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_locality_group *lg; - struct ext4_prealloc_space *pa; + struct ext4_prealloc_space *pa, *cpa = NULL; + ext4_fsblk_t goal_block; /* only data can be preallocated */ if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) @@ -3322,22 +3358,38 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) lg = ac->ac_lg; if (lg == NULL) return 0; + order = fls(ac->ac_o_ex.fe_len) - 1; + if (order > PREALLOC_TB_SIZE - 1) + /* The max size of hash table is PREALLOC_TB_SIZE */ + order = PREALLOC_TB_SIZE - 1; + + goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + + ac->ac_g_ex.fe_start + + le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); + /* + * search for the prealloc space that is having + * minimal distance from the goal block. + */ + for (i = order; i < PREALLOC_TB_SIZE; i++) { + rcu_read_lock(); + list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], + pa_inode_list) { + spin_lock(&pa->pa_lock); + if (pa->pa_deleted == 0 && + pa->pa_free >= ac->ac_o_ex.fe_len) { - rcu_read_lock(); - list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) { - spin_lock(&pa->pa_lock); - if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { - atomic_inc(&pa->pa_count); - ext4_mb_use_group_pa(ac, pa); + cpa = ext4_mb_check_group_pa(goal_block, + pa, cpa); + } spin_unlock(&pa->pa_lock); - ac->ac_criteria = 20; - rcu_read_unlock(); - return 1; } - spin_unlock(&pa->pa_lock); + rcu_read_unlock(); + } + if (cpa) { + ext4_mb_use_group_pa(ac, cpa); + ac->ac_criteria = 20; + return 1; } - rcu_read_unlock(); - return 0; } @@ -3560,6 +3612,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) pa->pa_free = pa->pa_len; atomic_set(&pa->pa_count, 1); spin_lock_init(&pa->pa_lock); + INIT_LIST_HEAD(&pa->pa_inode_list); pa->pa_deleted = 0; pa->pa_linear = 1; @@ -3580,10 +3633,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) list_add(&pa->pa_group_list, &grp->bb_prealloc_list); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); - spin_lock(pa->pa_obj_lock); - list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list); - spin_unlock(pa->pa_obj_lock); - + /* + * We will later add the new pa to the right bucket + * after updating the pa_free in ext4_mb_release_context + */ return 0; } @@ -3733,20 +3786,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { - /* error handling here */ - ext4_mb_release_desc(&e4b); - BUG_ON(bitmap_bh == NULL); + ext4_error(sb, __func__, "Error in reading block " + "bitmap for %lu\n", group); + return 0; } err = ext4_mb_load_buddy(sb, group, &e4b); - BUG_ON(err != 0); /* error handling here */ + if (err) { + ext4_error(sb, __func__, "Error in loading buddy " + "information for %lu\n", group); + put_bh(bitmap_bh); + return 0; + } if (needed == 0) needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; - grp = ext4_get_group_info(sb, group); INIT_LIST_HEAD(&list); - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); repeat: ext4_lock_group(sb, group); @@ -3903,13 +3959,18 @@ repeat: ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); err = ext4_mb_load_buddy(sb, group, &e4b); - BUG_ON(err != 0); /* error handling here */ + if (err) { + ext4_error(sb, __func__, "Error in loading buddy " + "information for %lu\n", group); + continue; + } bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { - /* error handling here */ + ext4_error(sb, __func__, "Error in reading block " + "bitmap for %lu\n", group); ext4_mb_release_desc(&e4b); - BUG_ON(bitmap_bh == NULL); + continue; } ext4_lock_group(sb, group); @@ -4112,22 +4173,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, } +static noinline_for_stack void +ext4_mb_discard_lg_preallocations(struct super_block *sb, + struct ext4_locality_group *lg, + int order, int total_entries) +{ + ext4_group_t group = 0; + struct ext4_buddy e4b; + struct list_head discard_list; + struct ext4_prealloc_space *pa, *tmp; + struct ext4_allocation_context *ac; + + mb_debug("discard locality group preallocation\n"); + + INIT_LIST_HEAD(&discard_list); + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + + spin_lock(&lg->lg_prealloc_lock); + list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], + pa_inode_list) { + spin_lock(&pa->pa_lock); + if (atomic_read(&pa->pa_count)) { + /* + * This is the pa that we just used + * for block allocation. So don't + * free that + */ + spin_unlock(&pa->pa_lock); + continue; + } + if (pa->pa_deleted) { + spin_unlock(&pa->pa_lock); + continue; + } + /* only lg prealloc space */ + BUG_ON(!pa->pa_linear); + + /* seems this one can be freed ... */ + pa->pa_deleted = 1; + spin_unlock(&pa->pa_lock); + + list_del_rcu(&pa->pa_inode_list); + list_add(&pa->u.pa_tmp_list, &discard_list); + + total_entries--; + if (total_entries <= 5) { + /* + * we want to keep only 5 entries + * allowing it to grow to 8. This + * mak sure we don't call discard + * soon for this list. + */ + break; + } + } + spin_unlock(&lg->lg_prealloc_lock); + + list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) { + + ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); + if (ext4_mb_load_buddy(sb, group, &e4b)) { + ext4_error(sb, __func__, "Error in loading buddy " + "information for %lu\n", group); + continue; + } + ext4_lock_group(sb, group); + list_del(&pa->pa_group_list); + ext4_mb_release_group_pa(&e4b, pa, ac); + ext4_unlock_group(sb, group); + + ext4_mb_release_desc(&e4b); + list_del(&pa->u.pa_tmp_list); + call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); + } + if (ac) + kmem_cache_free(ext4_ac_cachep, ac); +} + +/* + * We have incremented pa_count. So it cannot be freed at this + * point. Also we hold lg_mutex. So no parallel allocation is + * possible from this lg. That means pa_free cannot be updated. + * + * A parallel ext4_mb_discard_group_preallocations is possible. + * which can cause the lg_prealloc_list to be updated. + */ + +static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) +{ + int order, added = 0, lg_prealloc_count = 1; + struct super_block *sb = ac->ac_sb; + struct ext4_locality_group *lg = ac->ac_lg; + struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa; + + order = fls(pa->pa_free) - 1; + if (order > PREALLOC_TB_SIZE - 1) + /* The max size of hash table is PREALLOC_TB_SIZE */ + order = PREALLOC_TB_SIZE - 1; + /* Add the prealloc space to lg */ + rcu_read_lock(); + list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], + pa_inode_list) { + spin_lock(&tmp_pa->pa_lock); + if (tmp_pa->pa_deleted) { + spin_unlock(&pa->pa_lock); + continue; + } + if (!added && pa->pa_free < tmp_pa->pa_free) { + /* Add to the tail of the previous entry */ + list_add_tail_rcu(&pa->pa_inode_list, + &tmp_pa->pa_inode_list); + added = 1; + /* + * we want to count the total + * number of entries in the list + */ + } + spin_unlock(&tmp_pa->pa_lock); + lg_prealloc_count++; + } + if (!added) + list_add_tail_rcu(&pa->pa_inode_list, + &lg->lg_prealloc_list[order]); + rcu_read_unlock(); + + /* Now trim the list to be not more than 8 elements */ + if (lg_prealloc_count > 8) { + ext4_mb_discard_lg_preallocations(sb, lg, + order, lg_prealloc_count); + return; + } + return ; +} + /* * release all resource we used in allocation */ static int ext4_mb_release_context(struct ext4_allocation_context *ac) { - if (ac->ac_pa) { - if (ac->ac_pa->pa_linear) { + struct ext4_prealloc_space *pa = ac->ac_pa; + if (pa) { + if (pa->pa_linear) { /* see comment in ext4_mb_use_group_pa() */ - spin_lock(&ac->ac_pa->pa_lock); - ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len; - ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len; - ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len; - ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len; - spin_unlock(&ac->ac_pa->pa_lock); + spin_lock(&pa->pa_lock); + pa->pa_pstart += ac->ac_b_ex.fe_len; + pa->pa_lstart += ac->ac_b_ex.fe_len; + pa->pa_free -= ac->ac_b_ex.fe_len; + pa->pa_len -= ac->ac_b_ex.fe_len; + spin_unlock(&pa->pa_lock); + /* + * We want to add the pa to the right bucket. + * Remove it from the list and while adding + * make sure the list to which we are adding + * doesn't grow big. + */ + if (likely(pa->pa_free)) { + spin_lock(pa->pa_obj_lock); + list_del_rcu(&pa->pa_inode_list); + spin_unlock(pa->pa_obj_lock); + ext4_mb_add_n_trim(ac); + } } - ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa); + ext4_mb_put_pa(ac, ac->ac_sb, pa); } if (ac->ac_bitmap_page) page_cache_release(ac->ac_bitmap_page); @@ -4420,11 +4627,15 @@ do_more: count -= overflow; } bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (!bitmap_bh) + if (!bitmap_bh) { + err = -EIO; goto error_return; + } gdp = ext4_get_group_desc(sb, block_group, &gd_bh); - if (!gdp) + if (!gdp) { + err = -EIO; goto error_return; + } if (in_range(ext4_block_bitmap(sb, gdp), block, count) || in_range(ext4_inode_bitmap(sb, gdp), block, count) || diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index bfe6add46bcf..c7c9906c2a75 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -164,11 +164,17 @@ struct ext4_free_extent { * Locality group: * we try to group all related changes together * so that writeback can flush/allocate them together as well + * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC + * (512). We store prealloc space into the hash based on the pa_free blocks + * order value.ie, fls(pa_free)-1; */ +#define PREALLOC_TB_SIZE 10 struct ext4_locality_group { /* for allocator */ - struct mutex lg_mutex; /* to serialize allocates */ - struct list_head lg_prealloc_list;/* list of preallocations */ + /* to serialize allocates */ + struct mutex lg_mutex; + /* list of preallocations */ + struct list_head lg_prealloc_list[PREALLOC_TB_SIZE]; spinlock_t lg_prealloc_lock; }; diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b9e077ba07e9..46fc0b5b12ba 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode, * credit. But below we try to not accumalate too much * of them by restarting the journal. */ - needed = ext4_ext_calc_credits_for_insert(inode, path); + needed = ext4_ext_calc_credits_for_single_extent(inode, + lb->last_block - lb->first_block + 1, path); /* * Make sure the credit we accumalated is not really high diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f000fbe2cd93..b3d35604ea18 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -73,7 +73,7 @@ static int verify_group_input(struct super_block *sb, "Inode bitmap not in group (block %llu)", (unsigned long long)input->inode_bitmap); else if (outside(input->inode_table, start, end) || - outside(itend - 1, start, end)) + outside(itend - 1, start, end)) ext4_warning(sb, __func__, "Inode table not in group (blocks %llu-%llu)", (unsigned long long)input->inode_table, itend - 1); @@ -104,7 +104,7 @@ static int verify_group_input(struct super_block *sb, (unsigned long long)input->inode_bitmap, start, metaend - 1); else if (inside(input->inode_table, start, metaend) || - inside(itend - 1, start, metaend)) + inside(itend - 1, start, metaend)) ext4_warning(sb, __func__, "Inode table (%llu-%llu) overlaps" "GDT table (%llu-%llu)", @@ -158,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh, if (err) { if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) return err; - if ((err = ext4_journal_get_write_access(handle, bh))) + if ((err = ext4_journal_get_write_access(handle, bh))) return err; - } + } return 0; } @@ -416,11 +416,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", gdb_num); - /* - * If we are not using the primary superblock/GDT copy don't resize, - * because the user tools have no way of handling this. Probably a - * bad time to do it anyways. - */ + /* + * If we are not using the primary superblock/GDT copy don't resize, + * because the user tools have no way of handling this. Probably a + * bad time to do it anyways. + */ if (EXT4_SB(sb)->s_sbh->b_blocknr != le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { ext4_warning(sb, __func__, @@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return 0; exit_inode: - //ext4_journal_release_buffer(handle, iloc.bh); + /* ext4_journal_release_buffer(handle, iloc.bh); */ brelse(iloc.bh); exit_dindj: - //ext4_journal_release_buffer(handle, dind); + /* ext4_journal_release_buffer(handle, dind); */ exit_primary: - //ext4_journal_release_buffer(handle, *primary); + /* ext4_journal_release_buffer(handle, *primary); */ exit_sbh: - //ext4_journal_release_buffer(handle, *primary); + /* ext4_journal_release_buffer(handle, *primary); */ exit_dind: brelse(dind); exit_bh: @@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if (reserved_gdb || gdb_off == 0) { if (!EXT4_HAS_COMPAT_FEATURE(sb, - EXT4_FEATURE_COMPAT_RESIZE_INODE)){ + EXT4_FEATURE_COMPAT_RESIZE_INODE) + || !le16_to_cpu(es->s_reserved_gdt_blocks)) { ext4_warning(sb, __func__, "No reserved GDT blocks, can't resize"); return -EPERM; @@ -818,12 +819,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) goto exit_journal; - /* - * We will only either add reserved group blocks to a backup group - * or remove reserved blocks for the first group in a new group block. - * Doing both would be mean more complex code, and sane people don't - * use non-sparse filesystems anymore. This is already checked above. - */ + /* + * We will only either add reserved group blocks to a backup group + * or remove reserved blocks for the first group in a new group block. + * Doing both would be mean more complex code, and sane people don't + * use non-sparse filesystems anymore. This is already checked above. + */ if (gdb_off) { primary = sbi->s_group_desc[gdb_num]; if ((err = ext4_journal_get_write_access(handle, primary))) @@ -835,24 +836,24 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) } else if ((err = add_new_gdb(handle, inode, input, &primary))) goto exit_journal; - /* - * OK, now we've set up the new group. Time to make it active. - * - * Current kernels don't lock all allocations via lock_super(), - * so we have to be safe wrt. concurrent accesses the group - * data. So we need to be careful to set all of the relevant - * group descriptor data etc. *before* we enable the group. - * - * The key field here is sbi->s_groups_count: as long as - * that retains its old value, nobody is going to access the new - * group. - * - * So first we update all the descriptor metadata for the new - * group; then we update the total disk blocks count; then we - * update the groups count to enable the group; then finally we - * update the free space counts so that the system can start - * using the new disk blocks. - */ + /* + * OK, now we've set up the new group. Time to make it active. + * + * Current kernels don't lock all allocations via lock_super(), + * so we have to be safe wrt. concurrent accesses the group + * data. So we need to be careful to set all of the relevant + * group descriptor data etc. *before* we enable the group. + * + * The key field here is sbi->s_groups_count: as long as + * that retains its old value, nobody is going to access the new + * group. + * + * So first we update all the descriptor metadata for the new + * group; then we update the total disk blocks count; then we + * update the groups count to enable the group; then finally we + * update the free space counts so that the system can start + * using the new disk blocks. + */ /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)((char *)primary->b_data + @@ -946,7 +947,8 @@ exit_put: return err; } /* ext4_group_add */ -/* Extend the filesystem to the new number of blocks specified. This entry +/* + * Extend the filesystem to the new number of blocks specified. This entry * point is only used to extend the current filesystem to the end of the last * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" * for emergencies (because it has no dependencies on reserved blocks). @@ -1024,7 +1026,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, o_blocks_count + add, add); /* See if the device is actually as big as what was requested */ - bh = sb_bread(sb, o_blocks_count + add -1); + bh = sb_bread(sb, o_blocks_count + add - 1); if (!bh) { ext4_warning(sb, __func__, "can't read last block, resize aborted"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1e69f29a8c55..566344b926b7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -49,20 +49,19 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_create_journal(struct super_block *, struct ext4_super_block *, unsigned int); -static void ext4_commit_super (struct super_block * sb, - struct ext4_super_block * es, - int sync); -static void ext4_mark_recovery_complete(struct super_block * sb, - struct ext4_super_block * es); -static void ext4_clear_journal_err(struct super_block * sb, - struct ext4_super_block * es); +static void ext4_commit_super(struct super_block *sb, + struct ext4_super_block *es, int sync); +static void ext4_mark_recovery_complete(struct super_block *sb, + struct ext4_super_block *es); +static void ext4_clear_journal_err(struct super_block *sb, + struct ext4_super_block *es); static int ext4_sync_fs(struct super_block *sb, int wait); -static const char *ext4_decode_error(struct super_block * sb, int errno, +static const char *ext4_decode_error(struct super_block *sb, int errno, char nbuf[16]); -static int ext4_remount (struct super_block * sb, int * flags, char * data); -static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf); +static int ext4_remount(struct super_block *sb, int *flags, char *data); +static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static void ext4_unlockfs(struct super_block *sb); -static void ext4_write_super (struct super_block * sb); +static void ext4_write_super(struct super_block *sb); static void ext4_write_super_lockfs(struct super_block *sb); @@ -211,15 +210,15 @@ static void ext4_handle_error(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return; - if (!test_opt (sb, ERRORS_CONT)) { + if (!test_opt(sb, ERRORS_CONT)) { journal_t *journal = EXT4_SB(sb)->s_journal; EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; if (journal) jbd2_journal_abort(journal, -EIO); } - if (test_opt (sb, ERRORS_RO)) { - printk (KERN_CRIT "Remounting filesystem read-only\n"); + if (test_opt(sb, ERRORS_RO)) { + printk(KERN_CRIT "Remounting filesystem read-only\n"); sb->s_flags |= MS_RDONLY; } ext4_commit_super(sb, es, 1); @@ -228,13 +227,13 @@ static void ext4_handle_error(struct super_block *sb) sb->s_id); } -void ext4_error (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext4_error(struct super_block *sb, const char *function, + const char *fmt, ...) { va_list args; va_start(args, fmt); - printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); + printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); vprintk(fmt, args); printk("\n"); va_end(args); @@ -242,7 +241,7 @@ void ext4_error (struct super_block * sb, const char * function, ext4_handle_error(sb); } -static const char *ext4_decode_error(struct super_block * sb, int errno, +static const char *ext4_decode_error(struct super_block *sb, int errno, char nbuf[16]) { char *errstr = NULL; @@ -278,8 +277,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno, /* __ext4_std_error decodes expected errors from journaling functions * automatically and invokes the appropriate error response. */ -void __ext4_std_error (struct super_block * sb, const char * function, - int errno) +void __ext4_std_error(struct super_block *sb, const char *function, int errno) { char nbuf[16]; const char *errstr; @@ -292,8 +290,8 @@ void __ext4_std_error (struct super_block * sb, const char * function, return; errstr = ext4_decode_error(sb, errno, nbuf); - printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", - sb->s_id, function, errstr); + printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", + sb->s_id, function, errstr); ext4_handle_error(sb); } @@ -308,15 +306,15 @@ void __ext4_std_error (struct super_block * sb, const char * function, * case we take the easy way out and panic immediately. */ -void ext4_abort (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext4_abort(struct super_block *sb, const char *function, + const char *fmt, ...) { va_list args; - printk (KERN_CRIT "ext4_abort called.\n"); + printk(KERN_CRIT "ext4_abort called.\n"); va_start(args, fmt); - printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); + printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); vprintk(fmt, args); printk("\n"); va_end(args); @@ -334,8 +332,8 @@ void ext4_abort (struct super_block * sb, const char * function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); } -void ext4_warning (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext4_warning(struct super_block *sb, const char *function, + const char *fmt, ...) { va_list args; @@ -496,7 +494,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) } } -static void ext4_put_super (struct super_block * sb) +static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; @@ -570,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) #endif ei->i_block_alloc_info = NULL; ei->vfs_inode.i_version = 1; + ei->vfs_inode.i_data.writeback_index = 0; memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); @@ -647,7 +646,8 @@ static void ext4_clear_inode(struct inode *inode) &EXT4_I(inode)->jinode); } -static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) +static inline void ext4_show_quota_options(struct seq_file *seq, + struct super_block *sb) { #if defined(CONFIG_QUOTA) struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -822,8 +822,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, } #ifdef CONFIG_QUOTA -#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") -#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) +#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") +#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) static int ext4_dquot_initialize(struct inode *inode, int type); static int ext4_dquot_drop(struct inode *inode); @@ -991,12 +991,12 @@ static ext4_fsblk_t get_sb_block(void **data) return sb_block; } -static int parse_options (char *options, struct super_block *sb, - unsigned int *inum, unsigned long *journal_devnum, - ext4_fsblk_t *n_blocks_count, int is_remount) +static int parse_options(char *options, struct super_block *sb, + unsigned int *inum, unsigned long *journal_devnum, + ext4_fsblk_t *n_blocks_count, int is_remount) { struct ext4_sb_info *sbi = EXT4_SB(sb); - char * p; + char *p; substring_t args[MAX_OPT_ARGS]; int data_opt = 0; int option; @@ -1009,7 +1009,7 @@ static int parse_options (char *options, struct super_block *sb, if (!options) return 1; - while ((p = strsep (&options, ",")) != NULL) { + while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) continue; @@ -1017,16 +1017,16 @@ static int parse_options (char *options, struct super_block *sb, token = match_token(p, tokens, args); switch (token) { case Opt_bsd_df: - clear_opt (sbi->s_mount_opt, MINIX_DF); + clear_opt(sbi->s_mount_opt, MINIX_DF); break; case Opt_minix_df: - set_opt (sbi->s_mount_opt, MINIX_DF); + set_opt(sbi->s_mount_opt, MINIX_DF); break; case Opt_grpid: - set_opt (sbi->s_mount_opt, GRPID); + set_opt(sbi->s_mount_opt, GRPID); break; case Opt_nogrpid: - clear_opt (sbi->s_mount_opt, GRPID); + clear_opt(sbi->s_mount_opt, GRPID); break; case Opt_resuid: if (match_int(&args[0], &option)) @@ -1043,41 +1043,41 @@ static int parse_options (char *options, struct super_block *sb, /* *sb_block = match_int(&args[0]); */ break; case Opt_err_panic: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_RO); - set_opt (sbi->s_mount_opt, ERRORS_PANIC); + clear_opt(sbi->s_mount_opt, ERRORS_CONT); + clear_opt(sbi->s_mount_opt, ERRORS_RO); + set_opt(sbi->s_mount_opt, ERRORS_PANIC); break; case Opt_err_ro: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_RO); + clear_opt(sbi->s_mount_opt, ERRORS_CONT); + clear_opt(sbi->s_mount_opt, ERRORS_PANIC); + set_opt(sbi->s_mount_opt, ERRORS_RO); break; case Opt_err_cont: - clear_opt (sbi->s_mount_opt, ERRORS_RO); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_CONT); + clear_opt(sbi->s_mount_opt, ERRORS_RO); + clear_opt(sbi->s_mount_opt, ERRORS_PANIC); + set_opt(sbi->s_mount_opt, ERRORS_CONT); break; case Opt_nouid32: - set_opt (sbi->s_mount_opt, NO_UID32); + set_opt(sbi->s_mount_opt, NO_UID32); break; case Opt_nocheck: - clear_opt (sbi->s_mount_opt, CHECK); + clear_opt(sbi->s_mount_opt, CHECK); break; case Opt_debug: - set_opt (sbi->s_mount_opt, DEBUG); + set_opt(sbi->s_mount_opt, DEBUG); break; case Opt_oldalloc: - set_opt (sbi->s_mount_opt, OLDALLOC); + set_opt(sbi->s_mount_opt, OLDALLOC); break; case Opt_orlov: - clear_opt (sbi->s_mount_opt, OLDALLOC); + clear_opt(sbi->s_mount_opt, OLDALLOC); break; #ifdef CONFIG_EXT4DEV_FS_XATTR case Opt_user_xattr: - set_opt (sbi->s_mount_opt, XATTR_USER); + set_opt(sbi->s_mount_opt, XATTR_USER); break; case Opt_nouser_xattr: - clear_opt (sbi->s_mount_opt, XATTR_USER); + clear_opt(sbi->s_mount_opt, XATTR_USER); break; #else case Opt_user_xattr: @@ -1115,7 +1115,7 @@ static int parse_options (char *options, struct super_block *sb, "journal on remount\n"); return 0; } - set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); + set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); break; case Opt_journal_inum: if (is_remount) { @@ -1145,7 +1145,7 @@ static int parse_options (char *options, struct super_block *sb, set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); break; case Opt_noload: - set_opt (sbi->s_mount_opt, NOLOAD); + set_opt(sbi->s_mount_opt, NOLOAD); break; case Opt_commit: if (match_int(&args[0], &option)) @@ -1331,7 +1331,7 @@ set_qf_format: "on this filesystem, use tune2fs\n"); return 0; } - set_opt (sbi->s_mount_opt, EXTENTS); + set_opt(sbi->s_mount_opt, EXTENTS); break; case Opt_noextents: /* @@ -1348,7 +1348,7 @@ set_qf_format: "-o noextents options\n"); return 0; } - clear_opt (sbi->s_mount_opt, EXTENTS); + clear_opt(sbi->s_mount_opt, EXTENTS); break; case Opt_i_version: set_opt(sbi->s_mount_opt, I_VERSION); @@ -1374,9 +1374,9 @@ set_qf_format: set_opt(sbi->s_mount_opt, DELALLOC); break; default: - printk (KERN_ERR - "EXT4-fs: Unrecognized mount option \"%s\" " - "or missing value\n", p); + printk(KERN_ERR + "EXT4-fs: Unrecognized mount option \"%s\" " + "or missing value\n", p); return 0; } } @@ -1423,31 +1423,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int res = 0; if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { - printk (KERN_ERR "EXT4-fs warning: revision level too high, " - "forcing read-only mode\n"); + printk(KERN_ERR "EXT4-fs warning: revision level too high, " + "forcing read-only mode\n"); res = MS_RDONLY; } if (read_only) return res; if (!(sbi->s_mount_state & EXT4_VALID_FS)) - printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " - "running e2fsck is recommended\n"); + printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " + "running e2fsck is recommended\n"); else if ((sbi->s_mount_state & EXT4_ERROR_FS)) - printk (KERN_WARNING - "EXT4-fs warning: mounting fs with errors, " - "running e2fsck is recommended\n"); + printk(KERN_WARNING + "EXT4-fs warning: mounting fs with errors, " + "running e2fsck is recommended\n"); else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && le16_to_cpu(es->s_mnt_count) >= (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) - printk (KERN_WARNING - "EXT4-fs warning: maximal mount count reached, " - "running e2fsck is recommended\n"); + printk(KERN_WARNING + "EXT4-fs warning: maximal mount count reached, " + "running e2fsck is recommended\n"); else if (le32_to_cpu(es->s_checkinterval) && (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds())) - printk (KERN_WARNING - "EXT4-fs warning: checktime reached, " - "running e2fsck is recommended\n"); + printk(KERN_WARNING + "EXT4-fs warning: checktime reached, " + "running e2fsck is recommended\n"); #if 0 /* @@@ We _will_ want to clear the valid bit if we find * inconsistencies, to force a fsck at reboot. But for @@ -1506,14 +1506,13 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / groups_per_flex; - sbi->s_flex_groups = kmalloc(flex_group_count * + sbi->s_flex_groups = kzalloc(flex_group_count * sizeof(struct flex_groups), GFP_KERNEL); if (sbi->s_flex_groups == NULL) { - printk(KERN_ERR "EXT4-fs: not enough memory\n"); + printk(KERN_ERR "EXT4-fs: not enough memory for " + "%lu flex groups\n", flex_group_count); goto failed; } - memset(sbi->s_flex_groups, 0, flex_group_count * - sizeof(struct flex_groups)); gdp = ext4_get_group_desc(sb, 1, &bh); block_bitmap = ext4_block_bitmap(sb, gdp) - 1; @@ -1597,16 +1596,14 @@ static int ext4_check_descriptors(struct super_block *sb) (EXT4_BLOCKS_PER_GROUP(sb) - 1); block_bitmap = ext4_block_bitmap(sb, gdp); - if (block_bitmap < first_block || block_bitmap > last_block) - { + if (block_bitmap < first_block || block_bitmap > last_block) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " "Block bitmap for group %lu not in group " "(block %llu)!", i, block_bitmap); return 0; } inode_bitmap = ext4_inode_bitmap(sb, gdp); - if (inode_bitmap < first_block || inode_bitmap > last_block) - { + if (inode_bitmap < first_block || inode_bitmap > last_block) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " "Inode bitmap for group %lu not in group " "(block %llu)!", i, inode_bitmap); @@ -1614,26 +1611,28 @@ static int ext4_check_descriptors(struct super_block *sb) } inode_table = ext4_inode_table(sb, gdp); if (inode_table < first_block || - inode_table + sbi->s_itb_per_group - 1 > last_block) - { + inode_table + sbi->s_itb_per_group - 1 > last_block) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " "Inode table for group %lu not in group " "(block %llu)!", i, inode_table); return 0; } + spin_lock(sb_bgl_lock(sbi, i)); if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " "Checksum for group %lu failed (%u!=%u)\n", i, le16_to_cpu(ext4_group_desc_csum(sbi, i, gdp)), le16_to_cpu(gdp->bg_checksum)); - return 0; + if (!(sb->s_flags & MS_RDONLY)) + return 0; } + spin_unlock(sb_bgl_lock(sbi, i)); if (!flexbg_flag) first_block += EXT4_BLOCKS_PER_GROUP(sb); } ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); - sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb)); + sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); return 1; } @@ -1654,8 +1653,8 @@ static int ext4_check_descriptors(struct super_block *sb) * e2fsck was run on this filesystem, and it must have already done the orphan * inode cleanup for us, so we can safely abort without any further action. */ -static void ext4_orphan_cleanup (struct super_block * sb, - struct ext4_super_block * es) +static void ext4_orphan_cleanup(struct super_block *sb, + struct ext4_super_block *es) { unsigned int s_flags = sb->s_flags; int nr_orphans = 0, nr_truncates = 0; @@ -1732,7 +1731,7 @@ static void ext4_orphan_cleanup (struct super_block * sb, iput(inode); /* The delete magic happens here! */ } -#define PLURAL(x) (x), ((x)==1) ? "" : "s" +#define PLURAL(x) (x), ((x) == 1) ? "" : "s" if (nr_orphans) printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", @@ -1899,12 +1898,12 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) return 0; } -static int ext4_fill_super (struct super_block *sb, void *data, int silent) +static int ext4_fill_super(struct super_block *sb, void *data, int silent) __releases(kernel_lock) __acquires(kernel_lock) { - struct buffer_head * bh; + struct buffer_head *bh; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi; ext4_fsblk_t block; @@ -1953,7 +1952,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) } if (!(bh = sb_bread(sb, logical_sb_block))) { - printk (KERN_ERR "EXT4-fs: unable to read superblock\n"); + printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); goto out_fail; } /* @@ -2026,8 +2025,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, DELALLOC); - if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, - NULL, 0)) + if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum, + NULL, 0)) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -2102,7 +2101,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } - brelse (bh); + brelse(bh); logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; offset = do_div(logical_sb_block, blocksize); bh = sb_bread(sb, logical_sb_block); @@ -2114,8 +2113,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); sbi->s_es = es; if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { - printk (KERN_ERR - "EXT4-fs: Magic mismatch, very weird !\n"); + printk(KERN_ERR + "EXT4-fs: Magic mismatch, very weird !\n"); goto failed_mount; } } @@ -2132,9 +2131,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { - printk (KERN_ERR - "EXT4-fs: unsupported inode size: %d\n", - sbi->s_inode_size); + printk(KERN_ERR + "EXT4-fs: unsupported inode size: %d\n", + sbi->s_inode_size); goto failed_mount; } if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) @@ -2166,20 +2165,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_mount_state = le16_to_cpu(es->s_state); sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); - for (i=0; i < 4; i++) + for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; if (sbi->s_blocks_per_group > blocksize * 8) { - printk (KERN_ERR - "EXT4-fs: #blocks per group too big: %lu\n", - sbi->s_blocks_per_group); + printk(KERN_ERR + "EXT4-fs: #blocks per group too big: %lu\n", + sbi->s_blocks_per_group); goto failed_mount; } if (sbi->s_inodes_per_group > blocksize * 8) { - printk (KERN_ERR - "EXT4-fs: #inodes per group too big: %lu\n", - sbi->s_inodes_per_group); + printk(KERN_ERR + "EXT4-fs: #inodes per group too big: %lu\n", + sbi->s_inodes_per_group); goto failed_mount; } @@ -2213,10 +2212,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_groups_count = blocks_count; db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); - sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), + sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL); if (sbi->s_group_desc == NULL) { - printk (KERN_ERR "EXT4-fs: not enough memory\n"); + printk(KERN_ERR "EXT4-fs: not enough memory\n"); goto failed_mount; } @@ -2226,13 +2225,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) block = descriptor_loc(sb, logical_sb_block, i); sbi->s_group_desc[i] = sb_bread(sb, block); if (!sbi->s_group_desc[i]) { - printk (KERN_ERR "EXT4-fs: " - "can't read group descriptor %d\n", i); + printk(KERN_ERR "EXT4-fs: " + "can't read group descriptor %d\n", i); db_count = i; goto failed_mount2; } } - if (!ext4_check_descriptors (sb)) { + if (!ext4_check_descriptors(sb)) { printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); goto failed_mount2; } @@ -2308,11 +2307,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) EXT4_SB(sb)->s_journal->j_failed_commit) { printk(KERN_CRIT "EXT4-fs error (device %s): " "ext4_fill_super: Journal transaction " - "%u is corrupt\n", sb->s_id, + "%u is corrupt\n", sb->s_id, EXT4_SB(sb)->s_journal->j_failed_commit); - if (test_opt (sb, ERRORS_RO)) { - printk (KERN_CRIT - "Mounting filesystem read-only\n"); + if (test_opt(sb, ERRORS_RO)) { + printk(KERN_CRIT + "Mounting filesystem read-only\n"); sb->s_flags |= MS_RDONLY; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); @@ -2332,9 +2331,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount3; } else { if (!silent) - printk (KERN_ERR - "ext4: No journal on filesystem on %s\n", - sb->s_id); + printk(KERN_ERR + "ext4: No journal on filesystem on %s\n", + sb->s_id); goto failed_mount3; } @@ -2418,7 +2417,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount4; } - ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); + ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); /* determine the minimum size of new large inodes, if present */ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { @@ -2457,12 +2456,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; if (needs_recovery) - printk (KERN_INFO "EXT4-fs: recovery complete.\n"); + printk(KERN_INFO "EXT4-fs: recovery complete.\n"); ext4_mark_recovery_complete(sb, es); - printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", - test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": - test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); + printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", + test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": + test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": + "writeback"); if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " @@ -2575,14 +2574,14 @@ static journal_t *ext4_get_journal(struct super_block *sb, static journal_t *ext4_get_dev_journal(struct super_block *sb, dev_t j_dev) { - struct buffer_head * bh; + struct buffer_head *bh; journal_t *journal; ext4_fsblk_t start; ext4_fsblk_t len; int hblock, blocksize; ext4_fsblk_t sb_block; unsigned long offset; - struct ext4_super_block * es; + struct ext4_super_block *es; struct block_device *bdev; bdev = ext4_blkdev_get(j_dev); @@ -2697,8 +2696,8 @@ static int ext4_load_journal(struct super_block *sb, "unavailable, cannot proceed.\n"); return -EROFS; } - printk (KERN_INFO "EXT4-fs: write access will " - "be enabled during recovery.\n"); + printk(KERN_INFO "EXT4-fs: write access will " + "be enabled during recovery.\n"); } } @@ -2751,8 +2750,8 @@ static int ext4_load_journal(struct super_block *sb, return 0; } -static int ext4_create_journal(struct super_block * sb, - struct ext4_super_block * es, +static int ext4_create_journal(struct super_block *sb, + struct ext4_super_block *es, unsigned int journal_inum) { journal_t *journal; @@ -2793,9 +2792,8 @@ static int ext4_create_journal(struct super_block * sb, return 0; } -static void ext4_commit_super (struct super_block * sb, - struct ext4_super_block * es, - int sync) +static void ext4_commit_super(struct super_block *sb, + struct ext4_super_block *es, int sync) { struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; @@ -2816,8 +2814,8 @@ static void ext4_commit_super (struct super_block * sb, * remounting) the filesystem readonly, then we will end up with a * consistent fs on disk. Record that fact. */ -static void ext4_mark_recovery_complete(struct super_block * sb, - struct ext4_super_block * es) +static void ext4_mark_recovery_complete(struct super_block *sb, + struct ext4_super_block *es) { journal_t *journal = EXT4_SB(sb)->s_journal; @@ -2839,8 +2837,8 @@ static void ext4_mark_recovery_complete(struct super_block * sb, * has recorded an error from a previous lifetime, move that error to the * main filesystem now. */ -static void ext4_clear_journal_err(struct super_block * sb, - struct ext4_super_block * es) +static void ext4_clear_journal_err(struct super_block *sb, + struct ext4_super_block *es) { journal_t *journal; int j_errno; @@ -2865,7 +2863,7 @@ static void ext4_clear_journal_err(struct super_block * sb, EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super (sb, es, 1); + ext4_commit_super(sb, es, 1); jbd2_journal_clear_err(journal); } @@ -2898,7 +2896,7 @@ int ext4_force_commit(struct super_block *sb) * This implicitly triggers the writebehind on sync(). */ -static void ext4_write_super (struct super_block * sb) +static void ext4_write_super(struct super_block *sb) { if (mutex_trylock(&sb->s_lock) != 0) BUG(); @@ -2954,13 +2952,14 @@ static void ext4_unlockfs(struct super_block *sb) } } -static int ext4_remount (struct super_block * sb, int * flags, char * data) +static int ext4_remount(struct super_block *sb, int *flags, char *data) { - struct ext4_super_block * es; + struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t n_blocks_count = 0; unsigned long old_sb_flags; struct ext4_mount_options old_opts; + ext4_group_t g; int err; #ifdef CONFIG_QUOTA int i; @@ -3039,6 +3038,26 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data) } /* + * Make sure the group descriptor checksums + * are sane. If they aren't, refuse to + * remount r/w. + */ + for (g = 0; g < sbi->s_groups_count; g++) { + struct ext4_group_desc *gdp = + ext4_get_group_desc(sb, g, NULL); + + if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { + printk(KERN_ERR + "EXT4-fs: ext4_remount: " + "Checksum for group %lu failed (%u!=%u)\n", + g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), + le16_to_cpu(gdp->bg_checksum)); + err = -EINVAL; + goto restore_opts; + } + } + + /* * If we have an unprocessed orphan list hanging * around from a previously readonly bdev mount, * require a full umount/remount for now. @@ -3063,7 +3082,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data) sbi->s_mount_state = le16_to_cpu(es->s_state); if ((err = ext4_group_extend(sb, es, n_blocks_count))) goto restore_opts; - if (!ext4_setup_super (sb, es, 0)) + if (!ext4_setup_super(sb, es, 0)) sb->s_flags &= ~MS_RDONLY; } } @@ -3093,7 +3112,7 @@ restore_opts: return err; } -static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) +static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -3331,12 +3350,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, } /* Journaling quota? */ if (EXT4_SB(sb)->s_qf_names[type]) { - /* Quotafile not of fs root? */ + /* Quotafile not in fs root? */ if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) printk(KERN_WARNING "EXT4-fs: Quota file not on filesystem root. " "Journaled quota will not work.\n"); - } + } /* * When we journal data on quota file, we have to flush journal to see diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 93c5fdcdad2e..8954208b4893 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header, char *name = entry->e_name; int n; - for (n=0; n < entry->e_name_len; n++) { + for (n = 0; n < entry->e_name_len; n++) { hash = (hash << NAME_HASH_SHIFT) ^ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ *name++; diff --git a/fs/fat/file.c b/fs/fat/file.c index 8707a8cfa02c..ddde37025ca6 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -313,6 +313,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) return 0; } +#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) + int fat_setattr(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); @@ -336,9 +338,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) /* Check for setting the inode time. */ ia_valid = attr->ia_valid; - if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) { + if (ia_valid & TIMES_SET_FLAGS) { if (fat_allow_set_time(sbi, inode)) - attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET); + attr->ia_valid &= ~TIMES_SET_FLAGS; } error = inode_change_ok(inode, attr); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 6d266d793e2c..80ff3381fa21 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait) struct buffer_head *bh; struct msdos_dir_entry *raw_entry; loff_t i_pos; - int err = 0; + int err; retry: i_pos = MSDOS_I(inode)->i_pos; if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) return 0; - lock_super(sb); bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); if (!bh) { printk(KERN_ERR "FAT: unable to read inode block " "for updating (i_pos %lld)\n", i_pos); - err = -EIO; - goto out; + return -EIO; } spin_lock(&sbi->inode_hash_lock); if (i_pos != MSDOS_I(inode)->i_pos) { spin_unlock(&sbi->inode_hash_lock); brelse(bh); - unlock_super(sb); goto retry; } @@ -607,11 +604,10 @@ retry: } spin_unlock(&sbi->inode_hash_lock); mark_buffer_dirty(bh); + err = 0; if (wait) err = sync_dirty_buffer(bh); brelse(bh); -out: - unlock_super(sb); return err; } diff --git a/fs/inode.c b/fs/inode.c index b6726f644530..0487ddba1397 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -166,6 +166,7 @@ static struct inode *alloc_inode(struct super_block *sb) mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); mapping->assoc_mapping = NULL; mapping->backing_dev_info = &default_backing_dev_info; + mapping->writeback_index = 0; /* * If the block_device provides a backing_dev_info for client diff --git a/fs/ioprio.c b/fs/ioprio.c index c4a1c3c65aac..da3cc460d4df 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -115,11 +115,11 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) pgrp = task_pgrp(current); else pgrp = find_vpid(who); - do_each_pid_task(pgrp, PIDTYPE_PGID, p) { + do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { ret = set_task_ioprio(p, ioprio); if (ret) break; - } while_each_pid_task(pgrp, PIDTYPE_PGID, p); + } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case IOPRIO_WHO_USER: if (!who) @@ -204,7 +204,7 @@ asmlinkage long sys_ioprio_get(int which, int who) pgrp = task_pgrp(current); else pgrp = find_vpid(who); - do_each_pid_task(pgrp, PIDTYPE_PGID, p) { + do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { tmpio = get_task_ioprio(p); if (tmpio < 0) continue; @@ -212,7 +212,7 @@ asmlinkage long sys_ioprio_get(int which, int who) ret = tmpio; else ret = ioprio_best(ret, tmpio); - } while_each_pid_task(pgrp, PIDTYPE_PGID, p); + } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case IOPRIO_WHO_USER: if (!who) diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 2eccbfaa1d48..ae08c057e751 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh) goto nope; /* OK, it's a truncated page */ - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto nope; page_cache_get(page); @@ -221,7 +221,7 @@ write_out_data: * blocking lock_buffer(). */ if (buffer_dirty(bh)) { - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { BUFFER_TRACE(bh, "needs blocking lock"); spin_unlock(&journal->j_list_lock); /* Write out all data to prevent deadlocks */ @@ -446,7 +446,7 @@ void journal_commit_transaction(journal_t *journal) spin_lock(&journal->j_list_lock); } if (unlikely(!buffer_uptodate(bh))) { - if (TestSetPageLocked(bh->b_page)) { + if (!trylock_page(bh->b_page)) { spin_unlock(&journal->j_list_lock); lock_page(bh->b_page); spin_lock(&journal->j_list_lock); diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 8dee32007500..0540ca27a446 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks) goto out; } - lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); + lock_map_acquire(&handle->h_lockdep_map); out: return handle; @@ -1448,7 +1448,7 @@ int journal_stop(handle_t *handle) spin_unlock(&journal->j_state_lock); } - lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); + lock_map_release(&handle->h_lockdep_map); jbd_free_handle(handle); return err; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f8b3be873226..f2ad061e95ec 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh) goto nope; /* OK, it's a truncated page */ - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto nope; page_cache_get(page); @@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal, jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); - if (!ret) - ret = err; + if (err) { + /* + * Because AS_EIO is cleared by + * wait_on_page_writeback_range(), set it again so + * that user process can get -EIO from fsync(). + */ + set_bit(AS_EIO, + &jinode->i_vfs_inode->i_mapping->flags); + + if (!ret) + ret = err; + } spin_lock(&journal->j_list_lock); jinode->i_flags &= ~JI_COMMIT_RUNNING; wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); @@ -670,8 +680,14 @@ start_journal_io: * commit block, which happens below in such setting. */ err = journal_finish_inode_data_buffers(journal, commit_transaction); - if (err) - jbd2_journal_abort(journal, err); + if (err) { + char b[BDEVNAME_SIZE]; + + printk(KERN_WARNING + "JBD2: Detected IO errors while flushing file data " + "on %s\n", bdevname(journal->j_fs_dev, b)); + err = 0; + } /* Lo and behold: we have just managed to send a transaction to the log. Before we can commit it, wait for the IO so far to diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b26c6d9fe6ae..8207a01c4edb 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features); EXPORT_SYMBOL(jbd2_journal_create); EXPORT_SYMBOL(jbd2_journal_load); EXPORT_SYMBOL(jbd2_journal_destroy); -EXPORT_SYMBOL(jbd2_journal_update_superblock); EXPORT_SYMBOL(jbd2_journal_abort); EXPORT_SYMBOL(jbd2_journal_errno); EXPORT_SYMBOL(jbd2_journal_ack_err); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 4f7cadbb19fa..e5d540588fa9 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -301,7 +301,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) goto out; } - lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); + lock_map_acquire(&handle->h_lockdep_map); out: return handle; } @@ -1279,7 +1279,7 @@ int jbd2_journal_stop(handle_t *handle) spin_unlock(&journal->j_state_lock); } - lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); + lock_map_release(&handle->h_lockdep_map); jbd2_free_handle(handle); return err; diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 31559f45fdde..4c41db91eaa4 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h @@ -12,7 +12,6 @@ #ifndef _JFFS2_FS_I #define _JFFS2_FS_I -#include <linux/version.h> #include <linux/rbtree.h> #include <linux/posix_acl.h> #include <linux/mutex.h> diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 399444639337..4a714f64515b 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -83,7 +83,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, { struct nlm_host *host; struct nlm_file *file; - int rc = rpc_success; + __be32 rc = rpc_success; dprintk("lockd: TEST4 called\n"); resp->cookie = argp->cookie; @@ -116,7 +116,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, { struct nlm_host *host; struct nlm_file *file; - int rc = rpc_success; + __be32 rc = rpc_success; dprintk("lockd: LOCK called\n"); diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 76019d2ff72d..76262c1986f2 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -112,7 +112,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, { struct nlm_host *host; struct nlm_file *file; - int rc = rpc_success; + __be32 rc = rpc_success; dprintk("lockd: TEST called\n"); resp->cookie = argp->cookie; @@ -146,7 +146,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, { struct nlm_host *host; struct nlm_file *file; - int rc = rpc_success; + __be32 rc = rpc_success; dprintk("lockd: LOCK called\n"); diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 8478fc25daee..46763d1cd397 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -127,7 +127,7 @@ enum { Opt_err }; -static match_table_t __initconst tokens = { +static match_table_t __initdata tokens = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 33bfcf09db46..9dc036f18356 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1023,7 +1023,7 @@ exp_export(struct nfsctl_export *nxp) /* Look up the dentry */ err = path_lookup(nxp->ex_path, 0, &nd); if (err) - goto out_unlock; + goto out_put_clp; err = -EINVAL; exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); @@ -1090,9 +1090,9 @@ finish: exp_put(exp); if (fsid_key && !IS_ERR(fsid_key)) cache_put(&fsid_key->h, &svc_expkey_cache); - if (clp) - auth_domain_put(clp); path_put(&nd.path); +out_put_clp: + auth_domain_put(clp); out_unlock: exp_writeunlock(); out: diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index eef1629806f5..2e51adac65de 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -851,7 +851,7 @@ struct nfsd4_operation { static struct nfsd4_operation nfsd4_ops[]; -static inline char *nfsd4_op_name(unsigned opnum); +static const char *nfsd4_op_name(unsigned opnum); /* * COMPOUND call. @@ -1116,8 +1116,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { }, }; -static inline char * -nfsd4_op_name(unsigned opnum) +static const char *nfsd4_op_name(unsigned opnum) { if (opnum < ARRAY_SIZE(nfsd4_ops)) return nfsd4_ops[opnum].op_name; diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 00e9ccde8e42..b38f944f0667 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1194,7 +1194,7 @@ lock_retry_remap: tbh = bhs[i]; if (!tbh) continue; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); /* The buffer dirty state is now irrelevant, just clean it. */ clear_buffer_dirty(tbh); diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 33ff314cc507..9669541d0119 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -665,7 +665,7 @@ lock_retry_remap: for (i = 0; i < nr_bhs; i++) { struct buffer_head *tbh = bhs[i]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) continue; if (unlikely(buffer_uptodate(tbh))) { unlock_buffer(tbh); diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 790defb847e7..17d32ca6bc35 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); BUG_ON(!buffer_uptodate(tbh)); clear_buffer_dirty(tbh); @@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); BUG_ON(!buffer_uptodate(tbh)); clear_buffer_dirty(tbh); diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index e1781c8b1650..9e8a95be7a1e 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, // TODO: Consider moving this lot to a separate function! (AIA) handle_name: { - struct dentry *real_dent, *new_dent; MFT_RECORD *m; ntfs_attr_search_ctx *ctx; ntfs_inode *ni = NTFS_I(dent_inode); @@ -255,93 +254,9 @@ handle_name: } nls_name.hash = full_name_hash(nls_name.name, nls_name.len); - /* - * Note: No need for dent->d_lock lock as i_mutex is held on the - * parent inode. - */ - - /* Does a dentry matching the nls_name exist already? */ - real_dent = d_lookup(dent->d_parent, &nls_name); - /* If not, create it now. */ - if (!real_dent) { - real_dent = d_alloc(dent->d_parent, &nls_name); - kfree(nls_name.name); - if (!real_dent) { - err = -ENOMEM; - goto err_out; - } - new_dent = d_splice_alias(dent_inode, real_dent); - if (new_dent) - dput(real_dent); - else - new_dent = real_dent; - ntfs_debug("Done. (Created new dentry.)"); - return new_dent; - } + dent = d_add_ci(dent, dent_inode, &nls_name); kfree(nls_name.name); - /* Matching dentry exists, check if it is negative. */ - if (real_dent->d_inode) { - if (unlikely(real_dent->d_inode != dent_inode)) { - /* This can happen because bad inodes are unhashed. */ - BUG_ON(!is_bad_inode(dent_inode)); - BUG_ON(!is_bad_inode(real_dent->d_inode)); - } - /* - * Already have the inode and the dentry attached, decrement - * the reference count to balance the ntfs_iget() we did - * earlier on. We found the dentry using d_lookup() so it - * cannot be disconnected and thus we do not need to worry - * about any NFS/disconnectedness issues here. - */ - iput(dent_inode); - ntfs_debug("Done. (Already had inode and dentry.)"); - return real_dent; - } - /* - * Negative dentry: instantiate it unless the inode is a directory and - * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED), - * in which case d_move() that in place of the found dentry. - */ - if (!S_ISDIR(dent_inode->i_mode)) { - /* Not a directory; everything is easy. */ - d_instantiate(real_dent, dent_inode); - ntfs_debug("Done. (Already had negative file dentry.)"); - return real_dent; - } - spin_lock(&dcache_lock); - if (list_empty(&dent_inode->i_dentry)) { - /* - * Directory without a 'disconnected' dentry; we need to do - * d_instantiate() by hand because it takes dcache_lock which - * we already hold. - */ - list_add(&real_dent->d_alias, &dent_inode->i_dentry); - real_dent->d_inode = dent_inode; - spin_unlock(&dcache_lock); - security_d_instantiate(real_dent, dent_inode); - ntfs_debug("Done. (Already had negative directory dentry.)"); - return real_dent; - } - /* - * Directory with a 'disconnected' dentry; get a reference to the - * 'disconnected' dentry. - */ - new_dent = list_entry(dent_inode->i_dentry.next, struct dentry, - d_alias); - dget_locked(new_dent); - spin_unlock(&dcache_lock); - /* Do security vodoo. */ - security_d_instantiate(real_dent, dent_inode); - /* Move new_dent in place of real_dent. */ - d_move(new_dent, real_dent); - /* Balance the ntfs_iget() we did above. */ - iput(dent_inode); - /* Throw away real_dent. */ - dput(real_dent); - /* Use new_dent as the actual dentry. */ - ntfs_debug("Done. (Already had negative, disconnected directory " - "dentry.)"); - return new_dent; + return dent; eio_err_out: ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index d8bfa0eb41b2..52276c02f710 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c @@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v) " message id: %d\n" " message type: %u\n" " message key: 0x%08x\n" - " sock acquiry: %lu.%lu\n" - " send start: %lu.%lu\n" - " wait start: %lu.%lu\n", + " sock acquiry: %lu.%ld\n" + " send start: %lu.%ld\n" + " wait start: %lu.%ld\n", nst, (unsigned long)nst->st_task->pid, (unsigned long)nst->st_task->tgid, nst->st_task->comm, nst->st_node, nst->st_sc, nst->st_id, nst->st_msg_type, nst->st_msg_key, nst->st_sock_time.tv_sec, - (unsigned long)nst->st_sock_time.tv_usec, + (long)nst->st_sock_time.tv_usec, nst->st_send_time.tv_sec, - (unsigned long)nst->st_send_time.tv_usec, + (long)nst->st_send_time.tv_usec, nst->st_status_time.tv_sec, - nst->st_status_time.tv_usec); + (long)nst->st_status_time.tv_usec); } spin_unlock(&o2net_debug_lock); @@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) return sc; /* unused, just needs to be null when done */ } -#define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec +#define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec static int sc_seq_show(struct seq_file *seq, void *v) { @@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v) " remote node: %s\n" " page off: %zu\n" " handshake ok: %u\n" - " timer: %lu.%lu\n" - " data ready: %lu.%lu\n" - " advance start: %lu.%lu\n" - " advance stop: %lu.%lu\n" - " func start: %lu.%lu\n" - " func stop: %lu.%lu\n" + " timer: %lu.%ld\n" + " data ready: %lu.%ld\n" + " advance start: %lu.%ld\n" + " advance stop: %lu.%ld\n" + " func start: %lu.%ld\n" + " func stop: %lu.%ld\n" " func key: %u\n" " func type: %u\n", sc, diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index a27d61581bd6..2bcf706d9dd3 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); #ifdef CONFIG_DEBUG_FS -void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, - u32 msgkey, struct task_struct *task, u8 node) +static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) { INIT_LIST_HEAD(&nst->st_net_debug_item); nst->st_task = task; @@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, nst->st_node = node; } -void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) { do_gettimeofday(&nst->st_sock_time); } -void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) { do_gettimeofday(&nst->st_send_time); } -void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) { do_gettimeofday(&nst->st_status_time); } -void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, +static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, struct o2net_sock_container *sc) { nst->st_sc = sc; } -void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) +static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) { nst->st_id = msg_id; } + +#else /* CONFIG_DEBUG_FS */ + +static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, + u32 msgkey, struct task_struct *task, u8 node) +{ +} + +static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +{ +} + +static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +{ +} + +static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +{ +} + +static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc) +{ +} + +static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, + u32 msg_id) +{ +} + #endif /* CONFIG_DEBUG_FS */ static inline int o2net_reconnect_delay(void) diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 18307ff81b77..8d58cfe410b1 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h @@ -224,42 +224,10 @@ struct o2net_send_tracking { struct timeval st_send_time; struct timeval st_status_time; }; - -void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, - u32 msgkey, struct task_struct *task, u8 node); -void o2net_set_nst_sock_time(struct o2net_send_tracking *nst); -void o2net_set_nst_send_time(struct o2net_send_tracking *nst); -void o2net_set_nst_status_time(struct o2net_send_tracking *nst); -void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, - struct o2net_sock_container *sc); -void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id); - #else struct o2net_send_tracking { u32 dummy; }; - -static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, - u32 msgkey, struct task_struct *task, u8 node) -{ -} -static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) -{ -} -static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) -{ -} -static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) -{ -} -static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, - struct o2net_sock_container *sc) -{ -} -static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, - u32 msg_id) -{ -} #endif /* CONFIG_DEBUG_FS */ #endif /* O2CLUSTER_TCP_INTERNAL_H */ diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 8a1875848080..9cce563fd627 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1300,7 +1300,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, di->i_size = cpu_to_le64(sb->s_blocksize); di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); - dir->i_blocks = ocfs2_inode_sector_count(dir); /* * This should never fail as our extent list is empty and all @@ -1310,9 +1309,15 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, NULL); if (ret) { mlog_errno(ret); - goto out; + goto out_commit; } + /* + * Set i_blocks after the extent insert for the most up to + * date ip_clusters value. + */ + dir->i_blocks = ocfs2_inode_sector_count(dir); + ret = ocfs2_journal_dirty(handle, di_bh); if (ret) { mlog_errno(ret); @@ -1336,7 +1341,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, len, 0, NULL); if (ret) { mlog_errno(ret); - goto out; + goto out_commit; } } diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 7a37240f7a31..c47bc2a809c2 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -1418,13 +1418,13 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) { unsigned int node_num; int status, i; + u32 gen; struct buffer_head *bh = NULL; struct ocfs2_dinode *di; /* This is called with the super block cluster lock, so we * know that the slot map can't change underneath us. */ - spin_lock(&osb->osb_lock); for (i = 0; i < osb->max_slots; i++) { /* Read journal inode to get the recovery generation */ status = ocfs2_read_journal_inode(osb, i, &bh, NULL); @@ -1433,23 +1433,31 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) goto bail; } di = (struct ocfs2_dinode *)bh->b_data; - osb->slot_recovery_generations[i] = - ocfs2_get_recovery_generation(di); + gen = ocfs2_get_recovery_generation(di); brelse(bh); bh = NULL; + spin_lock(&osb->osb_lock); + osb->slot_recovery_generations[i] = gen; + mlog(0, "Slot %u recovery generation is %u\n", i, osb->slot_recovery_generations[i]); - if (i == osb->slot_num) + if (i == osb->slot_num) { + spin_unlock(&osb->osb_lock); continue; + } status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); - if (status == -ENOENT) + if (status == -ENOENT) { + spin_unlock(&osb->osb_lock); continue; + } - if (__ocfs2_recovery_map_test(osb, node_num)) + if (__ocfs2_recovery_map_test(osb, node_num)) { + spin_unlock(&osb->osb_lock); continue; + } spin_unlock(&osb->osb_lock); /* Ok, we have a slot occupied by another node which @@ -1465,10 +1473,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) mlog_errno(status); goto bail; } - - spin_lock(&osb->osb_lock); } - spin_unlock(&osb->osb_lock); status = 0; bail: diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 10e149ae5e3a..07f348b8d721 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name, goto out; } - /* Ok, the stack is pinned */ - p->sp_count++; active_stack = p; - rc = 0; out: + /* If we found it, pin it */ + if (!rc) + active_stack->sp_count++; + spin_unlock(&ocfs2_stack_lock); return rc; } diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c index 697663b01bae..e1c0ec0ae989 100644 --- a/fs/omfs/bitmap.c +++ b/fs/omfs/bitmap.c @@ -92,7 +92,7 @@ int omfs_allocate_block(struct super_block *sb, u64 block) struct buffer_head *bh; struct omfs_sb_info *sbi = OMFS_SB(sb); int bits_per_entry = 8 * sb->s_blocksize; - int map, bit; + unsigned int map, bit; int ret = 0; u64 tmp; @@ -176,7 +176,8 @@ int omfs_clear_range(struct super_block *sb, u64 block, int count) struct omfs_sb_info *sbi = OMFS_SB(sb); int bits_per_entry = 8 * sb->s_blocksize; u64 tmp; - int map, bit, ret; + unsigned int map, bit; + int ret; tmp = block; bit = do_div(tmp, bits_per_entry); diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 7e2499053e4d..834b2331f6b3 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -26,6 +26,13 @@ static int omfs_sync_file(struct file *file, struct dentry *dentry, return err ? -EIO : 0; } +static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset) +{ + return (sbi->s_sys_blocksize - offset - + sizeof(struct omfs_extent)) / + sizeof(struct omfs_extent_entry) + 1; +} + void omfs_make_empty_table(struct buffer_head *bh, int offset) { struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset]; @@ -45,6 +52,7 @@ int omfs_shrink_inode(struct inode *inode) struct buffer_head *bh; u64 next, last; u32 extent_count; + u32 max_extents; int ret; /* traverse extent table, freeing each entry that is greater @@ -62,15 +70,18 @@ int omfs_shrink_inode(struct inode *inode) goto out; oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); + max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START); for (;;) { - if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) { - brelse(bh); - goto out; - } + if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) + goto out_brelse; extent_count = be32_to_cpu(oe->e_extent_count); + + if (extent_count > max_extents) + goto out_brelse; + last = next; next = be64_to_cpu(oe->e_next); entry = &oe->e_entry; @@ -98,10 +109,14 @@ int omfs_shrink_inode(struct inode *inode) if (!bh) goto out; oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); + max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT); } ret = 0; out: return ret; +out_brelse: + brelse(bh); + return ret; } static void omfs_truncate(struct inode *inode) @@ -154,9 +169,7 @@ static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe, goto out; } } - max_count = (sbi->s_sys_blocksize - OMFS_EXTENT_START - - sizeof(struct omfs_extent)) / - sizeof(struct omfs_extent_entry) + 1; + max_count = omfs_max_extents(sbi, OMFS_EXTENT_START); /* TODO: add a continuation block here */ if (be32_to_cpu(oe->e_extent_count) > max_count-1) @@ -225,6 +238,7 @@ static int omfs_get_block(struct inode *inode, sector_t block, sector_t next, offset; int ret; u64 new_block; + u32 max_extents; int extent_count; struct omfs_extent *oe; struct omfs_extent_entry *entry; @@ -238,6 +252,7 @@ static int omfs_get_block(struct inode *inode, sector_t block, goto out; oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); + max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START); next = inode->i_ino; for (;;) { @@ -249,6 +264,9 @@ static int omfs_get_block(struct inode *inode, sector_t block, next = be64_to_cpu(oe->e_next); entry = &oe->e_entry; + if (extent_count > max_extents) + goto out_brelse; + offset = find_block(inode, entry, block, extent_count, &remain); if (offset > 0) { ret = 0; @@ -266,6 +284,7 @@ static int omfs_get_block(struct inode *inode, sector_t block, if (!bh) goto out; oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); + max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT); } if (create) { ret = omfs_grow_extent(inode, oe, &new_block); diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index d865f5535436..d29047b1b9b0 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -232,8 +232,7 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino) inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask); inode->i_op = &omfs_dir_inops; inode->i_fop = &omfs_dir_operations; - inode->i_size = be32_to_cpu(oi->i_head.h_body_size) + - sizeof(struct omfs_header); + inode->i_size = sbi->s_sys_blocksize; inc_nlink(inode); break; case OMFS_FILE: @@ -492,7 +491,8 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) { printk(KERN_ERR "omfs: block count discrepancy between " "super and root blocks (%llx, %llx)\n", - sbi->s_num_blocks, be64_to_cpu(omfs_rb->r_num_blocks)); + (unsigned long long)sbi->s_num_blocks, + (unsigned long long)be64_to_cpu(omfs_rb->r_num_blocks)); goto out_brelse_bh2; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 01ed610f9b87..a28840b11b89 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2423,10 +2423,13 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) "read_bytes: %llu\n" "write_bytes: %llu\n" "cancelled_write_bytes: %llu\n", - acct.rchar, acct.wchar, - acct.syscr, acct.syscw, - acct.read_bytes, acct.write_bytes, - acct.cancelled_write_bytes); + (unsigned long long)acct.rchar, + (unsigned long long)acct.wchar, + (unsigned long long)acct.syscr, + (unsigned long long)acct.syscw, + (unsigned long long)acct.read_bytes, + (unsigned long long)acct.write_bytes, + (unsigned long long)acct.cancelled_write_bytes); } static int proc_tid_io_accounting(struct task_struct *task, char *buffer) diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4fb81e9c94e3..bca0f81eb687 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -330,6 +330,7 @@ retry: spin_lock(&proc_inum_lock); ida_remove(&proc_inum_ida, i); spin_unlock(&proc_inum_lock); + return 0; } return PROC_DYNAMIC_FIRST + i; } diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 79ecd281d2cb..3f87d2632947 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) } seq_printf(m, - "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', - vma->vm_pgoff << PAGE_SHIFT, + ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); if (file) { diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7546a918f790..73d1891ee625 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -219,14 +219,14 @@ static int show_map(struct seq_file *m, void *v) ino = inode->i_ino; } - seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? 's' : 'p', - vma->vm_pgoff << PAGE_SHIFT, + ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); /* diff --git a/fs/readdir.c b/fs/readdir.c index 4e026e5407fb..93a7559bbfd8 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset if (buf->result) return -EINVAL; d_ino = ino; - if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->result = -EOVERFLOW; return -EOVERFLOW; + } buf->result++; dirent = buf->dirent; if (!access_ok(VERIFY_WRITE, dirent, @@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset, if (reclen > buf->count) return -EINVAL; d_ino = ino; - if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) + if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { + buf->error = -EOVERFLOW; return -EOVERFLOW; + } dirent = buf->previous; if (dirent) { if (__put_user(offset, &dirent->d_off)) diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 192269698a8a..5699171212ae 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page, if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { lock_buffer(bh); } else { - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { redirty_page_for_writepage(wbc, page); continue; } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c8f60ee183b5..c21df71943a6 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -627,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s, static void release_buffer_page(struct buffer_head *bh) { struct page *page = bh->b_page; - if (!page->mapping && !TestSetPageLocked(page)) { + if (!page->mapping && trylock_page(page)) { page_cache_get(page); put_bh(bh); if (!page->mapping) @@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock, jh = JH_ENTRY(list->next); bh = jh->bh; get_bh(bh); - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { if (!buffer_dirty(bh)) { list_move(&jh->list, &tmp); goto loop_next; @@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb, { PROC_INFO_INC(p_s_sb, journal.prepare); - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { if (!wait) return 0; lock_buffer(bh); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 282a13596c70..d318c7e663fa 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -27,7 +27,6 @@ #include <linux/mnt_namespace.h> #include <linux/mount.h> #include <linux/namei.h> -#include <linux/quotaops.h> struct file_system_type reiserfs_fs_type; diff --git a/fs/seq_file.c b/fs/seq_file.c index 3f54dbd6c49b..bd20f7f5a933 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) goto Done; } /* we need at least one record in buffer */ + pos = m->index; + p = m->op->start(m, &pos); while (1) { - pos = m->index; - p = m->op->start(m, &pos); err = PTR_ERR(p); if (!p || IS_ERR(p)) break; @@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) break; if (unlikely(err)) m->count = 0; + if (unlikely(!m->count)) { + p = m->op->next(m, p, &pos); + m->index = pos; + continue; + } if (m->count < m->size) goto Fill; m->op->stop(m, p); @@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) goto Enomem; m->count = 0; m->version = 0; + pos = m->index; + p = m->op->start(m, &pos); } m->op->stop(m, p); m->count = 0; @@ -443,6 +450,20 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) return -1; } +int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) +{ + size_t len = bitmap_scnprintf_len(nr_bits); + + if (m->count + len < m->size) { + bitmap_scnprintf(m->buf + m->count, m->size - m->count, + bits, nr_bits); + m->count += len; + return 0; + } + m->count = m->size; + return -1; +} + static void *single_start(struct seq_file *p, loff_t *pos) { return NULL + (*pos == 0); diff --git a/fs/splice.c b/fs/splice.c index b30311ba8af6..1bbc6f4bb09c 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * for an in-flight io page */ if (flags & SPLICE_F_NONBLOCK) { - if (TestSetPageLocked(page)) { + if (!trylock_page(page)) { error = -EAGAIN; break; } diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index d81fb9ed2b8e..154098157473 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -263,8 +263,8 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; - /* And make sure we have twice the index size of space reserved */ - idx_size <<= 1; + /* And make sure we have thrice the index size of space reserved */ + idx_size = idx_size + (idx_size << 1); /* * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' @@ -388,11 +388,11 @@ static int can_use_rp(struct ubifs_info *c) * This function makes sure UBIFS has enough free eraseblocks for index growth * and data. * - * When budgeting index space, UBIFS reserves twice as more LEBs as the index + * When budgeting index space, UBIFS reserves thrice as many LEBs as the index * would take if it was consolidated and written to the flash. This guarantees * that the "in-the-gaps" commit method always succeeds and UBIFS will always * be able to commit dirty index. So this function basically adds amount of - * budgeted index space to the size of the current index, multiplies this by 2, + * budgeted index space to the size of the current index, multiplies this by 3, * and makes sure this does not exceed the amount of free eraseblocks. * * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: @@ -543,8 +543,16 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) int err, idx_growth, data_growth, dd_growth; struct retries_info ri; + ubifs_assert(req->new_page <= 1); + ubifs_assert(req->dirtied_page <= 1); + ubifs_assert(req->new_dent <= 1); + ubifs_assert(req->mod_dent <= 1); + ubifs_assert(req->new_ino <= 1); + ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); ubifs_assert(req->dirtied_ino <= 4); ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); + ubifs_assert(!(req->new_ino_d & 7)); + ubifs_assert(!(req->dirtied_ino_d & 7)); data_growth = calc_data_growth(c, req); dd_growth = calc_dd_growth(c, req); @@ -618,8 +626,16 @@ again: */ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) { + ubifs_assert(req->new_page <= 1); + ubifs_assert(req->dirtied_page <= 1); + ubifs_assert(req->new_dent <= 1); + ubifs_assert(req->mod_dent <= 1); + ubifs_assert(req->new_ino <= 1); + ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); ubifs_assert(req->dirtied_ino <= 4); ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); + ubifs_assert(!(req->new_ino_d & 7)); + ubifs_assert(!(req->dirtied_ino_d & 7)); if (!req->recalculate) { ubifs_assert(req->idx_growth >= 0); ubifs_assert(req->data_growth >= 0); @@ -647,7 +663,11 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) ubifs_assert(c->budg_idx_growth >= 0); ubifs_assert(c->budg_data_growth >= 0); + ubifs_assert(c->budg_dd_growth >= 0); ubifs_assert(c->min_idx_lebs < c->main_lebs); + ubifs_assert(!(c->budg_idx_growth & 7)); + ubifs_assert(!(c->budg_data_growth & 7)); + ubifs_assert(!(c->budg_dd_growth & 7)); spin_unlock(&c->space_lock); } @@ -686,9 +706,10 @@ void ubifs_convert_page_budget(struct ubifs_info *c) void ubifs_release_dirty_inode_budget(struct ubifs_info *c, struct ubifs_inode *ui) { - struct ubifs_budget_req req = {.dd_growth = c->inode_budget, - .dirtied_ino_d = ui->data_len}; + struct ubifs_budget_req req; + memset(&req, 0, sizeof(struct ubifs_budget_req)); + req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); ubifs_release_budget(c, &req); } diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 3b516316c9b3..0a6aa2cc78f0 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -74,6 +74,7 @@ static int do_commit(struct ubifs_info *c) goto out_up; } + c->cmt_no += 1; err = ubifs_gc_start_commit(c); if (err) goto out_up; @@ -115,7 +116,7 @@ static int do_commit(struct ubifs_info *c) goto out; mutex_lock(&c->mst_mutex); - c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no); + c->mst_node->cmt_no = cpu_to_le64(c->cmt_no); c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); c->mst_node->root_offs = cpu_to_le32(zroot.offs); diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 4e3aaeba4eca..b9cb77473758 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -568,8 +568,8 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req) void dbg_dump_lstats(const struct ubifs_lp_stats *lst) { spin_lock(&dbg_lock); - printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n", - lst->empty_lebs, lst->idx_lebs); + printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " + "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, lst->total_dirty); @@ -587,8 +587,8 @@ void dbg_dump_budg(struct ubifs_info *c) struct ubifs_gced_idx_leb *idx_gc; spin_lock(&dbg_lock); - printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, " - "budg_dd_growth %lld, budg_idx_growth %lld\n", + printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " + "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, @@ -634,7 +634,7 @@ void dbg_dump_lprops(struct ubifs_info *c) struct ubifs_lprops lp; struct ubifs_lp_stats lst; - printk(KERN_DEBUG "Dumping LEB properties\n"); + printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid); ubifs_get_lp_stats(c, &lst); dbg_dump_lstats(&lst); @@ -655,7 +655,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) if (dbg_failure_mode) return; - printk(KERN_DEBUG "Dumping LEB %d\n", lnum); + printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum); sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); if (IS_ERR(sleb)) { @@ -720,8 +720,8 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) { int i; - printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n", - cat, heap->cnt); + printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n", + current->pid, cat, heap->cnt); for (i = 0; i < heap->cnt; i++) { struct ubifs_lprops *lprops = heap->arr[i]; @@ -736,7 +736,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, { int i; - printk(KERN_DEBUG "Dumping pnode:\n"); + printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid); printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", @@ -755,7 +755,7 @@ void dbg_dump_tnc(struct ubifs_info *c) int level; printk(KERN_DEBUG "\n"); - printk(KERN_DEBUG "Dumping the TNC tree\n"); + printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid); znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); level = znode->level; printk(KERN_DEBUG "== Level %d ==\n", level); @@ -2208,16 +2208,17 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, int offset, int len, int dtype) { - int err; + int err, failing; if (in_failure_mode(desc)) return -EIO; - if (do_fail(desc, lnum, 1)) + failing = do_fail(desc, lnum, 1); + if (failing) cut_data(buf, len); err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); if (err) return err; - if (in_failure_mode(desc)) + if (failing) return -EIO; return 0; } diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 3c4f1e93c9e0..50315fc57185 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -27,7 +27,7 @@ #define UBIFS_DBG(op) op -#define ubifs_assert(expr) do { \ +#define ubifs_assert(expr) do { \ if (unlikely(!(expr))) { \ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ __func__, __LINE__, current->pid); \ @@ -73,50 +73,50 @@ const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key); /* - * DBGKEY macros require dbg_lock to be held, which it is in the dbg message + * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message * macros. */ #define DBGKEY(key) dbg_key_str0(c, (key)) #define DBGKEY1(key) dbg_key_str1(c, (key)) /* General messages */ -#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) +#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) /* Additional journal messages */ -#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) +#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) /* Additional TNC messages */ -#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) +#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) /* Additional lprops messages */ -#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) +#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) /* Additional LEB find messages */ -#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) +#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) /* Additional mount messages */ -#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) +#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) /* Additional I/O messages */ -#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) +#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) /* Additional commit messages */ -#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) +#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) /* Additional budgeting messages */ -#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) +#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) /* Additional log messages */ -#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) +#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) /* Additional gc messages */ -#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) +#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) /* Additional scan messages */ -#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) +#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) /* Additional recovery messages */ -#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) +#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) /* * Debugging message type flags (must match msg_type_names in debug.c). @@ -239,34 +239,23 @@ typedef int (*dbg_leaf_callback)(struct ubifs_info *c, struct ubifs_zbranch *zbr, void *priv); typedef int (*dbg_znode_callback)(struct ubifs_info *c, struct ubifs_znode *znode, void *priv); - int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, dbg_znode_callback znode_cb, void *priv); /* Checking functions */ int dbg_check_lprops(struct ubifs_info *c); - int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); - int dbg_check_cats(struct ubifs_info *c); - int dbg_check_ltab(struct ubifs_info *c); - int dbg_check_synced_i_size(struct inode *inode); - int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); - int dbg_check_tnc(struct ubifs_info *c, int extra); - int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); - int dbg_check_filesystem(struct ubifs_info *c); - void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, int add_pos); - int dbg_check_lprops(struct ubifs_info *c); int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, int row, int col); @@ -329,71 +318,77 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, #else /* !CONFIG_UBIFS_FS_DEBUG */ #define UBIFS_DBG(op) -#define ubifs_assert(expr) ({}) -#define ubifs_assert_cmt_locked(c) + +/* Use "if (0)" to make compiler check arguments even if debugging is off */ +#define ubifs_assert(expr) do { \ + if (0 && (expr)) \ + printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ +} while (0) + +#define dbg_err(fmt, ...) do { \ + if (0) \ + ubifs_err(fmt, ##__VA_ARGS__); \ +} while (0) + +#define dbg_msg(fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__); \ +} while (0) + #define dbg_dump_stack() -#define dbg_err(fmt, ...) ({}) -#define dbg_msg(fmt, ...) ({}) -#define dbg_key(c, key, fmt, ...) ({}) - -#define dbg_gen(fmt, ...) ({}) -#define dbg_jnl(fmt, ...) ({}) -#define dbg_tnc(fmt, ...) ({}) -#define dbg_lp(fmt, ...) ({}) -#define dbg_find(fmt, ...) ({}) -#define dbg_mnt(fmt, ...) ({}) -#define dbg_io(fmt, ...) ({}) -#define dbg_cmt(fmt, ...) ({}) -#define dbg_budg(fmt, ...) ({}) -#define dbg_log(fmt, ...) ({}) -#define dbg_gc(fmt, ...) ({}) -#define dbg_scan(fmt, ...) ({}) -#define dbg_rcvry(fmt, ...) ({}) - -#define dbg_ntype(type) "" -#define dbg_cstate(cmt_state) "" -#define dbg_get_key_dump(c, key) ({}) -#define dbg_dump_inode(c, inode) ({}) -#define dbg_dump_node(c, node) ({}) -#define dbg_dump_budget_req(req) ({}) -#define dbg_dump_lstats(lst) ({}) -#define dbg_dump_budg(c) ({}) -#define dbg_dump_lprop(c, lp) ({}) -#define dbg_dump_lprops(c) ({}) -#define dbg_dump_leb(c, lnum) ({}) -#define dbg_dump_znode(c, znode) ({}) -#define dbg_dump_heap(c, heap, cat) ({}) -#define dbg_dump_pnode(c, pnode, parent, iip) ({}) -#define dbg_dump_tnc(c) ({}) -#define dbg_dump_index(c) ({}) +#define ubifs_assert_cmt_locked(c) -#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 +#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) + +#define DBGKEY(key) ((char *)(key)) +#define DBGKEY1(key) ((char *)(key)) + +#define dbg_ntype(type) "" +#define dbg_cstate(cmt_state) "" +#define dbg_get_key_dump(c, key) ({}) +#define dbg_dump_inode(c, inode) ({}) +#define dbg_dump_node(c, node) ({}) +#define dbg_dump_budget_req(req) ({}) +#define dbg_dump_lstats(lst) ({}) +#define dbg_dump_budg(c) ({}) +#define dbg_dump_lprop(c, lp) ({}) +#define dbg_dump_lprops(c) ({}) +#define dbg_dump_leb(c, lnum) ({}) +#define dbg_dump_znode(c, znode) ({}) +#define dbg_dump_heap(c, heap, cat) ({}) +#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +#define dbg_dump_tnc(c) ({}) +#define dbg_dump_index(c) ({}) +#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 #define dbg_old_index_check_init(c, zroot) 0 #define dbg_check_old_index(c, zroot) 0 - #define dbg_check_cats(c) 0 - #define dbg_check_ltab(c) 0 - #define dbg_check_synced_i_size(inode) 0 - #define dbg_check_dir_size(c, dir) 0 - #define dbg_check_tnc(c, x) 0 - #define dbg_check_idx_size(c, idx_size) 0 - #define dbg_check_filesystem(c) 0 - #define dbg_check_heap(c, heap, cat, add_pos) ({}) - #define dbg_check_lprops(c) 0 #define dbg_check_lpt_nodes(c, cnode, row, col) 0 - #define dbg_force_in_the_gaps_enabled 0 #define dbg_force_in_the_gaps() 0 - #define dbg_failure_mode 0 #define dbg_failure_mode_registration(c) ({}) #define dbg_failure_mode_deregistration(c) ({}) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index e90374be7d3b..5c96f1fb7016 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -165,7 +165,6 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, } inode->i_ino = ++c->highest_inum; - inode->i_generation = ++c->vfs_gen; /* * The creation sequence number remains with this inode for its * lifetime. All nodes for this inode have a greater sequence number, @@ -220,15 +219,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); if (err) { - /* - * Do not hash the direntry if parent 'i_nlink' is zero, because - * this has side-effects - '->delete_inode()' call will not be - * called for the parent orphan inode, because 'd_count' of its - * direntry will stay 1 (it'll be negative direntry I guess) - * and prevent 'iput_final()' until the dentry is destroyed due - * to unmount or memory pressure. - */ - if (err == -ENOENT && dir->i_nlink != 0) { + if (err == -ENOENT) { dbg_gen("not found"); goto done; } @@ -525,7 +516,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, struct ubifs_inode *dir_ui = ubifs_inode(dir); int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; /* * Budget request settings: new direntry, changing the target inode, @@ -727,8 +718,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_info *c = dir->i_sb->s_fs_info; int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, - .dirtied_ino_d = 1 }; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; /* * Budget request settings: new inode, new direntry and changing parent @@ -789,7 +779,8 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, int sz_change = CALC_DENT_SIZE(dentry->d_name.len); int err, devlen = 0; struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, - .new_ino_d = devlen, .dirtied_ino = 1 }; + .new_ino_d = ALIGN(devlen, 8), + .dirtied_ino = 1 }; /* * Budget request settings: new inode, new direntry and changing parent @@ -863,7 +854,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, int err, len = strlen(symname); int sz_change = CALC_DENT_SIZE(dentry->d_name.len); struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, - .new_ino_d = len, .dirtied_ino = 1 }; + .new_ino_d = ALIGN(len, 8), + .dirtied_ino = 1 }; /* * Budget request settings: new inode, new direntry and changing parent @@ -1012,7 +1004,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, .dirtied_ino = 3 }; struct ubifs_budget_req ino_req = { .dirtied_ino = 1, - .dirtied_ino_d = old_inode_ui->data_len }; + .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; /* diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 8565e586e533..4071d1cae29f 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -890,7 +890,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, loff_t new_size = attr->ia_size; struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_budget_req req = { .dirtied_ino = 1, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; err = ubifs_budget_space(c, &req); if (err) @@ -941,7 +941,8 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) struct inode *inode = dentry->d_inode; struct ubifs_info *c = inode->i_sb->s_fs_info; - dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid); + dbg_gen("ino %lu, mode %#x, ia_valid %#x", + inode->i_ino, inode->i_mode, attr->ia_valid); err = inode_change_ok(inode, attr); if (err) return err; @@ -1051,7 +1052,7 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode) if (mctime_update_needed(inode, &now)) { int err, release; struct ubifs_budget_req req = { .dirtied_ino = 1, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; err = ubifs_budget_space(c, &req); if (err) @@ -1270,6 +1271,7 @@ struct file_operations ubifs_file_operations = { .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, #ifdef CONFIG_COMPAT .compat_ioctl = ubifs_compat_ioctl, #endif diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 10394c548367..adee7b5ddeab 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -290,9 +290,14 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, idx_lp = idx_heap->arr[0]; sum = idx_lp->free + idx_lp->dirty; /* - * Since we reserve twice as more space for the index than it + * Since we reserve thrice as much space for the index than it * actually takes, it does not make sense to pick indexing LEBs - * with less than half LEB of dirty space. + * with less than, say, half LEB of dirty space. May be half is + * not the optimal boundary - this should be tested and + * checked. This boundary should determine how much we use + * in-the-gaps to consolidate the index comparing to how much + * we use garbage collector to consolidate it. The "half" + * criteria just feels to be fine. */ if (sum < min_space || sum < c->half_leb_size) idx_lp = NULL; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 3374f91b6709..054363f2b207 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -54,6 +54,20 @@ #include "ubifs.h" /** + * ubifs_ro_mode - switch UBIFS to read read-only mode. + * @c: UBIFS file-system description object + * @err: error code which is the reason of switching to R/O mode + */ +void ubifs_ro_mode(struct ubifs_info *c, int err) +{ + if (!c->ro_media) { + c->ro_media = 1; + ubifs_warn("switched to read-only mode, error %d", err); + dbg_dump_stack(); + } +} + +/** * ubifs_check_node - check node. * @c: UBIFS file-system description object * @buf: node to check diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 283155abe5f5..22993f867d19 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -447,13 +447,11 @@ static int get_dent_type(int mode) * @ino: buffer in which to pack inode node * @inode: inode to pack * @last: indicates the last node of the group - * @last_reference: non-zero if this is a deletion inode */ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, - const struct inode *inode, int last, - int last_reference) + const struct inode *inode, int last) { - int data_len = 0; + int data_len = 0, last_reference = !inode->i_nlink; struct ubifs_inode *ui = ubifs_inode(inode); ino->ch.node_type = UBIFS_INO_NODE; @@ -596,9 +594,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, ubifs_prep_grp_node(c, dent, dlen, 0); ino = (void *)dent + aligned_dlen; - pack_inode(c, ino, inode, 0, last_reference); + pack_inode(c, ino, inode, 0); ino = (void *)ino + aligned_ilen; - pack_inode(c, ino, dir, 1, 0); + pack_inode(c, ino, dir, 1); if (last_reference) { err = ubifs_add_orphan(c, inode->i_ino); @@ -606,6 +604,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, release_head(c, BASEHD); goto out_finish; } + ui->del_cmtno = c->cmt_no; } err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); @@ -750,30 +749,25 @@ out_free: * ubifs_jnl_write_inode - flush inode to the journal. * @c: UBIFS file-system description object * @inode: inode to flush - * @deletion: inode has been deleted * * This function writes inode @inode to the journal. If the inode is * synchronous, it also synchronizes the write-buffer. Returns zero in case of * success and a negative error code in case of failure. */ -int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, - int deletion) +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) { - int err, len, lnum, offs, sync = 0; + int err, lnum, offs; struct ubifs_ino_node *ino; struct ubifs_inode *ui = ubifs_inode(inode); + int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink; - dbg_jnl("ino %lu%s", inode->i_ino, - deletion ? " (last reference)" : ""); - if (deletion) - ubifs_assert(inode->i_nlink == 0); + dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink); - len = UBIFS_INO_NODE_SZ; /* * If the inode is being deleted, do not write the attached data. No * need to synchronize the write-buffer either. */ - if (!deletion) { + if (!last_reference) { len += ui->data_len; sync = IS_SYNC(inode); } @@ -786,7 +780,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, if (err) goto out_free; - pack_inode(c, ino, inode, 1, deletion); + pack_inode(c, ino, inode, 1); err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); if (err) goto out_release; @@ -795,7 +789,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, inode->i_ino); release_head(c, BASEHD); - if (deletion) { + if (last_reference) { err = ubifs_tnc_remove_ino(c, inode->i_ino); if (err) goto out_ro; @@ -828,6 +822,65 @@ out_free: } /** + * ubifs_jnl_delete_inode - delete an inode. + * @c: UBIFS file-system description object + * @inode: inode to delete + * + * This function deletes inode @inode which includes removing it from orphans, + * deleting it from TNC and, in some cases, writing a deletion inode to the + * journal. + * + * When regular file inodes are unlinked or a directory inode is removed, the + * 'ubifs_jnl_update()' function writes a corresponding deletion inode and + * direntry to the media, and adds the inode to orphans. After this, when the + * last reference to this inode has been dropped, this function is called. In + * general, it has to write one more deletion inode to the media, because if + * a commit happened between 'ubifs_jnl_update()' and + * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal + * anymore, and in fact it might not be on the flash anymore, because it might + * have been garbage-collected already. And for optimization reasons UBIFS does + * not read the orphan area if it has been unmounted cleanly, so it would have + * no indication in the journal that there is a deleted inode which has to be + * removed from TNC. + * + * However, if there was no commit between 'ubifs_jnl_update()' and + * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion + * inode to the media for the second time. And this is quite a typical case. + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode) +{ + int err; + struct ubifs_inode *ui = ubifs_inode(inode); + + ubifs_assert(inode->i_nlink == 0); + + if (ui->del_cmtno != c->cmt_no) + /* A commit happened for sure */ + return ubifs_jnl_write_inode(c, inode); + + down_read(&c->commit_sem); + /* + * Check commit number again, because the first test has been done + * without @c->commit_sem, so a commit might have happened. + */ + if (ui->del_cmtno != c->cmt_no) { + up_read(&c->commit_sem); + return ubifs_jnl_write_inode(c, inode); + } + + err = ubifs_tnc_remove_ino(c, inode->i_ino); + if (err) + ubifs_ro_mode(c, err); + else + ubifs_delete_orphan(c, inode->i_ino); + up_read(&c->commit_sem); + return err; +} + +/** * ubifs_jnl_rename - rename a directory entry. * @c: UBIFS file-system description object * @old_dir: parent inode of directory entry to rename @@ -917,16 +970,16 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, p = (void *)dent2 + aligned_dlen2; if (new_inode) { - pack_inode(c, p, new_inode, 0, last_reference); + pack_inode(c, p, new_inode, 0); p += ALIGN(ilen, 8); } if (!move) - pack_inode(c, p, old_dir, 1, 0); + pack_inode(c, p, old_dir, 1); else { - pack_inode(c, p, old_dir, 0, 0); + pack_inode(c, p, old_dir, 0); p += ALIGN(plen, 8); - pack_inode(c, p, new_dir, 1, 0); + pack_inode(c, p, new_dir, 1); } if (last_reference) { @@ -935,6 +988,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, release_head(c, BASEHD); goto out_finish; } + new_ui->del_cmtno = c->cmt_no; } err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); @@ -1131,7 +1185,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, if (err) goto out_free; - pack_inode(c, ino, inode, 0, 0); + pack_inode(c, ino, inode, 0); ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); if (dlen) ubifs_prep_grp_node(c, dn, dlen, 1); @@ -1251,9 +1305,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, ubifs_prep_grp_node(c, xent, xlen, 0); ino = (void *)xent + aligned_xlen; - pack_inode(c, ino, inode, 0, 1); + pack_inode(c, ino, inode, 0); ino = (void *)ino + UBIFS_INO_NODE_SZ; - pack_inode(c, ino, host, 1, 0); + pack_inode(c, ino, host, 1); err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); if (!sync && !err) @@ -1320,7 +1374,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, const struct inode *host) { int err, len1, len2, aligned_len, aligned_len1, lnum, offs; - struct ubifs_inode *host_ui = ubifs_inode(inode); + struct ubifs_inode *host_ui = ubifs_inode(host); struct ubifs_ino_node *ino; union ubifs_key key; int sync = IS_DIRSYNC(host); @@ -1344,8 +1398,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, if (err) goto out_free; - pack_inode(c, ino, host, 0, 0); - pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0); + pack_inode(c, ino, host, 0); + pack_inode(c, (void *)ino + aligned_len1, inode, 1); err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); if (!sync && !err) { diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 36857b9ed59e..3e0aa7367556 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -317,6 +317,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) return 0; out_unlock: + if (err != -EAGAIN) + ubifs_ro_mode(c, err); mutex_unlock(&c->log_mutex); kfree(ref); kfree(bud); @@ -410,7 +412,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) return -ENOMEM; cs->ch.node_type = UBIFS_CS_NODE; - cs->cmt_no = cpu_to_le64(c->cmt_no + 1); + cs->cmt_no = cpu_to_le64(c->cmt_no); ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); /* diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 4beccfc256d2..87dabf9fe742 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -80,20 +80,6 @@ static inline struct ubifs_inode *ubifs_inode(const struct inode *inode) } /** - * ubifs_ro_mode - switch UBIFS to read read-only mode. - * @c: UBIFS file-system description object - * @err: error code which is the reason of switching to R/O mode - */ -static inline void ubifs_ro_mode(struct ubifs_info *c, int err) -{ - if (!c->ro_media) { - c->ro_media = 1; - ubifs_warn("switched to read-only mode, error %d", err); - dbg_dump_stack(); - } -} - -/** * ubifs_compr_present - check if compressor was compiled in. * @compr_type: compressor type to check * @@ -322,7 +308,7 @@ static inline long long ubifs_reported_space(const struct ubifs_info *c, { int divisor, factor; - divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1); + divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3); factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; do_div(free, divisor); diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 3afeb9242c6a..02d3462f4d3e 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -310,10 +310,10 @@ static int write_orph_node(struct ubifs_info *c, int atomic) c->cmt_orphans -= cnt; spin_unlock(&c->orphan_lock); if (c->cmt_orphans) - orph->cmt_no = cpu_to_le64(c->cmt_no + 1); + orph->cmt_no = cpu_to_le64(c->cmt_no); else /* Mark the last node of the commit */ - orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63)); + orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63)); ubifs_assert(c->ohead_offs + len <= c->leb_size); ubifs_assert(c->ohead_lnum >= c->orph_first); ubifs_assert(c->ohead_lnum <= c->orph_last); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index ca1e2d4e03cc..f71e6b8822c4 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -30,7 +30,6 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/ctype.h> -#include <linux/random.h> #include <linux/kthread.h> #include <linux/parser.h> #include <linux/seq_file.h> @@ -149,7 +148,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) if (err) goto out_invalid; - /* Disable readahead */ + /* Disable read-ahead */ inode->i_mapping->backing_dev_info = &c->bdi; switch (inode->i_mode & S_IFMT) { @@ -278,7 +277,7 @@ static void ubifs_destroy_inode(struct inode *inode) */ static int ubifs_write_inode(struct inode *inode, int wait) { - int err; + int err = 0; struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_inode *ui = ubifs_inode(inode); @@ -299,10 +298,18 @@ static int ubifs_write_inode(struct inode *inode, int wait) return 0; } - dbg_gen("inode %lu", inode->i_ino); - err = ubifs_jnl_write_inode(c, inode, 0); - if (err) - ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); + /* + * As an optimization, do not write orphan inodes to the media just + * because this is not needed. + */ + dbg_gen("inode %lu, mode %#x, nlink %u", + inode->i_ino, (int)inode->i_mode, inode->i_nlink); + if (inode->i_nlink) { + err = ubifs_jnl_write_inode(c, inode); + if (err) + ubifs_err("can't write inode %lu, error %d", + inode->i_ino, err); + } ui->dirty = 0; mutex_unlock(&ui->ui_mutex); @@ -314,8 +321,9 @@ static void ubifs_delete_inode(struct inode *inode) { int err; struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_inode *ui = ubifs_inode(inode); - if (ubifs_inode(inode)->xattr) + if (ui->xattr) /* * Extended attribute inode deletions are fully handled in * 'ubifs_removexattr()'. These inodes are special and have @@ -323,7 +331,7 @@ static void ubifs_delete_inode(struct inode *inode) */ goto out; - dbg_gen("inode %lu", inode->i_ino); + dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); ubifs_assert(!atomic_read(&inode->i_count)); ubifs_assert(inode->i_nlink == 0); @@ -331,15 +339,19 @@ static void ubifs_delete_inode(struct inode *inode) if (is_bad_inode(inode)) goto out; - ubifs_inode(inode)->ui_size = inode->i_size = 0; - err = ubifs_jnl_write_inode(c, inode, 1); + ui->ui_size = inode->i_size = 0; + err = ubifs_jnl_delete_inode(c, inode); if (err) /* * Worst case we have a lost orphan inode wasting space, so a - * simple error message is ok here. + * simple error message is OK here. */ - ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); + ubifs_err("can't delete inode %lu, error %d", + inode->i_ino, err); + out: + if (ui->dirty) + ubifs_release_dirty_inode_budget(c, ui); clear_inode(inode); } @@ -1122,8 +1134,8 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_infos; - ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num, - c->vi.vol_id); + ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", + c->vi.ubi_num, c->vi.vol_id, c->vi.name); if (mounted_read_only) ubifs_msg("mounted read-only"); x = (long long)c->main_lebs * c->leb_size; @@ -1469,6 +1481,7 @@ static void ubifs_put_super(struct super_block *sb) */ ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); ubifs_assert(c->budg_idx_growth == 0); + ubifs_assert(c->budg_dd_growth == 0); ubifs_assert(c->budg_data_growth == 0); /* @@ -1657,7 +1670,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&c->orph_new); c->highest_inum = UBIFS_FIRST_INO; - get_random_bytes(&c->vfs_gen, sizeof(int)); c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; ubi_get_volume_info(ubi, &c->vi); @@ -1671,10 +1683,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) } /* - * UBIFS provids 'backing_dev_info' in order to disable readahead. For + * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For * UBIFS, I/O is not deferred, it is done immediately in readpage, * which means the user would have to wait not just for their own I/O - * but the readahead I/O as well i.e. completely pointless. + * but the read-ahead I/O as well i.e. completely pointless. * * Read-ahead will be disabled because @c->bdi.ra_pages is 0. */ diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 8117e65ba2e9..8ac76b1c2d55 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -372,26 +372,25 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) written = layout_leb_in_gaps(c, p); if (written < 0) { err = written; - if (err == -ENOSPC) { - if (!dbg_force_in_the_gaps_enabled) { - /* - * Do not print scary warnings if the - * debugging option which forces - * in-the-gaps is enabled. - */ - ubifs_err("out of space"); - spin_lock(&c->space_lock); - dbg_dump_budg(c); - spin_unlock(&c->space_lock); - dbg_dump_lprops(c); - } - /* Try to commit anyway */ - err = 0; - break; + if (err != -ENOSPC) { + kfree(c->gap_lebs); + c->gap_lebs = NULL; + return err; } - kfree(c->gap_lebs); - c->gap_lebs = NULL; - return err; + if (!dbg_force_in_the_gaps_enabled) { + /* + * Do not print scary warnings if the debugging + * option which forces in-the-gaps is enabled. + */ + ubifs_err("out of space"); + spin_lock(&c->space_lock); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + dbg_dump_lprops(c); + } + /* Try to commit anyway */ + err = 0; + break; } p++; cnt -= written; diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 0cc7da9bed47..bd2121f3426e 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -228,10 +228,10 @@ enum { /* Minimum number of orphan area logical eraseblocks */ #define UBIFS_MIN_ORPH_LEBS 1 /* - * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1 + * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1 * for GC, 1 for deletions, and at least 1 for committed data). */ -#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5) +#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6) /* Minimum number of logical eraseblocks */ #define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index e4f89f271827..d7f706f7a302 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -20,8 +20,6 @@ * Adrian Hunter */ -/* Implementation version 0.7 */ - #ifndef __UBIFS_H__ #define __UBIFS_H__ @@ -322,6 +320,8 @@ struct ubifs_gced_idx_leb { * struct ubifs_inode - UBIFS in-memory inode description. * @vfs_inode: VFS inode description object * @creat_sqnum: sequence number at time of creation + * @del_cmtno: commit number corresponding to the time the inode was deleted, + * protected by @c->commit_sem; * @xattr_size: summarized size of all extended attributes in bytes * @xattr_cnt: count of extended attributes this inode has * @xattr_names: sum of lengths of all extended attribute names belonging to @@ -373,6 +373,7 @@ struct ubifs_gced_idx_leb { struct ubifs_inode { struct inode vfs_inode; unsigned long long creat_sqnum; + unsigned long long del_cmtno; unsigned int xattr_size; unsigned int xattr_cnt; unsigned int xattr_names; @@ -779,7 +780,7 @@ struct ubifs_compressor { /** * struct ubifs_budget_req - budget requirements of an operation. * - * @fast: non-zero if the budgeting should try to aquire budget quickly and + * @fast: non-zero if the budgeting should try to acquire budget quickly and * should not try to call write-back * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields * have to be re-calculated @@ -805,21 +806,31 @@ struct ubifs_compressor { * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made * dirty by the re-name operation. + * + * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to + * make sure the amount of inode data which contribute to @new_ino_d and + * @dirtied_ino_d fields are aligned. */ struct ubifs_budget_req { unsigned int fast:1; unsigned int recalculate:1; +#ifndef UBIFS_DEBUG unsigned int new_page:1; unsigned int dirtied_page:1; unsigned int new_dent:1; unsigned int mod_dent:1; unsigned int new_ino:1; unsigned int new_ino_d:13; -#ifndef UBIFS_DEBUG unsigned int dirtied_ino:4; unsigned int dirtied_ino_d:15; #else /* Not bit-fields to check for overflows */ + unsigned int new_page; + unsigned int dirtied_page; + unsigned int new_dent; + unsigned int mod_dent; + unsigned int new_ino; + unsigned int new_ino_d; unsigned int dirtied_ino; unsigned int dirtied_ino_d; #endif @@ -860,13 +871,13 @@ struct ubifs_mount_opts { * struct ubifs_info - UBIFS file-system description data structure * (per-superblock). * @vfs_sb: VFS @struct super_block object - * @bdi: backing device info object to make VFS happy and disable readahead + * @bdi: backing device info object to make VFS happy and disable read-ahead * * @highest_inum: highest used inode number - * @vfs_gen: VFS inode generation counter * @max_sqnum: current global sequence number - * @cmt_no: commit number (last successfully completed commit) - * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters + * @cmt_no: commit number of the last successfully completed commit, protected + * by @commit_sem + * @cnt_lock: protects @highest_inum and @max_sqnum counters * @fmt_version: UBIFS on-flash format version * @uuid: UUID from super block * @@ -1103,7 +1114,6 @@ struct ubifs_info { struct backing_dev_info bdi; ino_t highest_inum; - unsigned int vfs_gen; unsigned long long max_sqnum; unsigned long long cmt_no; spinlock_t cnt_lock; @@ -1346,6 +1356,7 @@ extern struct backing_dev_info ubifs_backing_dev_info; extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ +void ubifs_ro_mode(struct ubifs_info *c, int err); int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, int dtype); @@ -1399,8 +1410,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, int deletion, int xent); int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, const union ubifs_key *key, const void *buf, int len); -int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, - int last_reference); +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); +int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode); int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct dentry *old_dentry, const struct inode *new_dir, diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 1388a078e1a9..649bec78b645 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -61,7 +61,7 @@ /* * Limit the number of extended attributes per inode so that the total size - * (xattr_size) is guaranteeded to fit in an 'unsigned int'. + * (@xattr_size) is guaranteeded to fit in an 'unsigned int'. */ #define MAX_XATTRS_PER_INODE 65535 @@ -103,14 +103,14 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, struct inode *inode; struct ubifs_inode *ui, *host_ui = ubifs_inode(host); struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, - .new_ino_d = size, .dirtied_ino = 1, - .dirtied_ino_d = host_ui->data_len}; + .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1, + .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) return -ENOSPC; /* * Linux limits the maximum size of the extended attribute names list - * to %XATTR_LIST_MAX. This means we should not allow creating more* + * to %XATTR_LIST_MAX. This means we should not allow creating more * extended attributes if the name list becomes larger. This limitation * is artificial for UBIFS, though. */ @@ -128,7 +128,6 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, goto out_budg; } - mutex_lock(&host_ui->ui_mutex); /* Re-define all operations to be "nothing" */ inode->i_mapping->a_ops = &none_address_operations; inode->i_op = &none_inode_operations; @@ -141,23 +140,19 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui->data = kmalloc(size, GFP_NOFS); if (!ui->data) { err = -ENOMEM; - goto out_unlock; + goto out_free; } - memcpy(ui->data, value, size); + inode->i_size = ui->ui_size = size; + ui->data_len = size; + + mutex_lock(&host_ui->ui_mutex); host->i_ctime = ubifs_current_time(host); host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(nm->len); host_ui->xattr_size += CALC_XATTR_BYTES(size); host_ui->xattr_names += nm->len; - /* - * We do not use i_size_write() because nobody can race with us as we - * are holding host @host->i_mutex - every xattr operation for this - * inode is serialized by it. - */ - inode->i_size = ui->ui_size = size; - ui->data_len = size; err = ubifs_jnl_update(c, host, nm, inode, 0, 1); if (err) goto out_cancel; @@ -172,8 +167,8 @@ out_cancel: host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); host_ui->xattr_size -= CALC_XATTR_BYTES(size); -out_unlock: mutex_unlock(&host_ui->ui_mutex); +out_free: make_bad_inode(inode); iput(inode); out_budg: @@ -200,29 +195,28 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, struct ubifs_inode *host_ui = ubifs_inode(host); struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_budget_req req = { .dirtied_ino = 2, - .dirtied_ino_d = size + host_ui->data_len }; + .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) }; ubifs_assert(ui->data_len == inode->i_size); err = ubifs_budget_space(c, &req); if (err) return err; - mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); - host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); - host_ui->xattr_size += CALC_XATTR_BYTES(size); - kfree(ui->data); ui->data = kmalloc(size, GFP_NOFS); if (!ui->data) { err = -ENOMEM; - goto out_unlock; + goto out_free; } - memcpy(ui->data, value, size); inode->i_size = ui->ui_size = size; ui->data_len = size; + mutex_lock(&host_ui->ui_mutex); + host->i_ctime = ubifs_current_time(host); + host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); + host_ui->xattr_size += CALC_XATTR_BYTES(size); + /* * It is important to write the host inode after the xattr inode * because if the host inode gets synchronized (via 'fsync()'), then @@ -240,9 +234,9 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, out_cancel: host_ui->xattr_size -= CALC_XATTR_BYTES(size); host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); - make_bad_inode(inode); -out_unlock: mutex_unlock(&host_ui->ui_mutex); + make_bad_inode(inode); +out_free: ubifs_release_budget(c, &req); return err; } @@ -312,6 +306,7 @@ int ubifs_setxattr(struct dentry *dentry, const char *name, dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + ubifs_assert(mutex_is_locked(&host->i_mutex)); if (size > UBIFS_MAX_INO_DATA) return -ERANGE; @@ -384,7 +379,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, if (!xent) return -ENOMEM; - mutex_lock(&host->i_mutex); xent_key_init(c, &key, host->i_ino, &nm); err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); if (err) { @@ -419,7 +413,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, out_iput: iput(inode); out_unlock: - mutex_unlock(&host->i_mutex); kfree(xent); return err; } @@ -449,8 +442,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) return -ERANGE; lowest_xent_key(c, &key, host->i_ino); - - mutex_lock(&host->i_mutex); while (1) { int type; @@ -479,7 +470,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) pxent = xent; key_read(c, &xent->key, &key); } - mutex_unlock(&host->i_mutex); kfree(pxent); if (err != -ENOENT) { @@ -497,8 +487,8 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, int err; struct ubifs_inode *host_ui = ubifs_inode(host); struct ubifs_inode *ui = ubifs_inode(inode); - struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1, - .dirtied_ino_d = host_ui->data_len }; + struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1, + .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; ubifs_assert(ui->data_len == inode->i_size); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 3e30e40aa24d..3141969b456d 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; - const struct match_token *tp = tokens; + struct match_token *tp = tokens; while (tp->token != Opt_onerror_panic && tp->token != mval) ++tp; diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h deleted file mode 100644 index 3abe7e9ceb33..000000000000 --- a/fs/xfs/linux-2.6/sema.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_SEMA_H__ -#define __XFS_SUPPORT_SEMA_H__ - -#include <linux/time.h> -#include <linux/wait.h> -#include <linux/semaphore.h> -#include <asm/atomic.h> - -/* - * sema_t structure just maps to struct semaphore in Linux kernel. - */ - -typedef struct semaphore sema_t; - -#define initnsema(sp, val, name) sema_init(sp, val) -#define psema(sp, b) down(sp) -#define vsema(sp) up(sp) -#define freesema(sema) do { } while (0) - -static inline int issemalocked(sema_t *sp) -{ - return down_trylock(sp) || (up(sp), 0); -} - -/* - * Map cpsema (try to get the sema) to down_trylock. We need to switch - * the return values since cpsema returns 1 (acquired) 0 (failed) and - * down_trylock returns the reverse 0 (acquired) 1 (failed). - */ -static inline int cpsema(sema_t *sp) -{ - return down_trylock(sp) ? 0 : 1; -} - -#endif /* __XFS_SUPPORT_SEMA_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 0b211cba1909..f42f80a3b1fa 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -73,7 +73,6 @@ xfs_page_trace( unsigned long pgoff) { xfs_inode_t *ip; - bhv_vnode_t *vp = vn_from_inode(inode); loff_t isize = i_size_read(inode); loff_t offset = page_offset(page); int delalloc = -1, unmapped = -1, unwritten = -1; @@ -81,7 +80,7 @@ xfs_page_trace( if (page_has_buffers(page)) xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); - ip = xfs_vtoi(vp); + ip = XFS_I(inode); if (!ip->i_rwtrace) return; @@ -675,7 +674,7 @@ xfs_probe_cluster( } else pg_offset = PAGE_CACHE_SIZE; - if (page->index == tindex && !TestSetPageLocked(page)) { + if (page->index == tindex && trylock_page(page)) { pg_len = xfs_probe_page(page, pg_offset, mapped); unlock_page(page); } @@ -759,7 +758,7 @@ xfs_convert_page( if (page->index != tindex) goto fail; - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto fail; if (PageWriteback(page)) goto fail_unlock_page; @@ -1104,7 +1103,7 @@ xfs_page_state_convert( * that we are writing into for the first time. */ type = IOMAP_NEW; - if (!test_and_set_bit(BH_Lock, &bh->b_state)) { + if (trylock_buffer(bh)) { ASSERT(buffer_mapped(bh)); if (iomap_valid) all_bh = 1; diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 9cc8f0213095..986061ae1b9b 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -58,7 +58,7 @@ xfs_buf_trace( bp, id, (void *)(unsigned long)bp->b_flags, (void *)(unsigned long)bp->b_hold.counter, - (void *)(unsigned long)bp->b_sema.count.counter, + (void *)(unsigned long)bp->b_sema.count, (void *)current, data, ra, (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), @@ -253,7 +253,7 @@ _xfs_buf_initialize( memset(bp, 0, sizeof(xfs_buf_t)); atomic_set(&bp->b_hold, 1); - init_MUTEX_LOCKED(&bp->b_iodonesema); + init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_list); INIT_LIST_HEAD(&bp->b_hash_list); init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ @@ -838,6 +838,7 @@ xfs_buf_rele( return; } + ASSERT(atomic_read(&bp->b_hold) > 0); if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { if (bp->b_relse) { atomic_inc(&bp->b_hold); @@ -851,11 +852,6 @@ xfs_buf_rele( spin_unlock(&hash->bh_lock); xfs_buf_free(bp); } - } else { - /* - * Catch reference count leaks - */ - ASSERT(atomic_read(&bp->b_hold) >= 0); } } @@ -1037,7 +1033,7 @@ xfs_buf_ioend( xfs_buf_iodone_work(&bp->b_iodone_work); } } else { - up(&bp->b_iodonesema); + complete(&bp->b_iowait); } } @@ -1275,7 +1271,7 @@ xfs_buf_iowait( XB_TRACE(bp, "iowait", 0); if (atomic_read(&bp->b_io_remaining)) blk_run_address_space(bp->b_target->bt_mapping); - down(&bp->b_iodonesema); + wait_for_completion(&bp->b_iowait); XB_TRACE(bp, "iowaited", (long)bp->b_error); return bp->b_error; } @@ -1799,7 +1795,7 @@ int __init xfs_buf_init(void) { #ifdef XFS_BUF_TRACE - xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP); + xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS); #endif xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 29d1d4adc078..fe0109956656 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -157,7 +157,7 @@ typedef struct xfs_buf { xfs_buf_iodone_t b_iodone; /* I/O completion function */ xfs_buf_relse_t b_relse; /* releasing function */ xfs_buf_bdstrat_t b_strat; /* pre-write function */ - struct semaphore b_iodonesema; /* Semaphore for I/O waiters */ + struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; void *b_fspriv2; void *b_fspriv3; @@ -352,7 +352,7 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) #define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) #define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) -#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema); +#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) #define XFS_BUF_TARGET(bp) ((bp)->b_target) diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 987fe84f7b13..24fd598af846 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -139,7 +139,7 @@ xfs_nfs_get_inode( } xfs_iunlock(ip, XFS_ILOCK_SHARED); - return ip->i_vnode; + return VFS_I(ip); } STATIC struct dentry * @@ -167,7 +167,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, if (!inode) return NULL; if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); result = d_alloc_anon(inode); if (!result) { iput(inode); @@ -198,7 +198,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, if (!inode) return NULL; if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); result = d_alloc_anon(inode); if (!result) { iput(inode); @@ -219,9 +219,9 @@ xfs_fs_get_parent( if (unlikely(error)) return ERR_PTR(-error); - parent = d_alloc_anon(cip->i_vnode); + parent = d_alloc_anon(VFS_I(cip)); if (unlikely(!parent)) { - iput(cip->i_vnode); + iput(VFS_I(cip)); return ERR_PTR(-ENOMEM); } return parent; diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 5f60363b9343..5311c1acdd40 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = { const struct file_operations xfs_dir_file_operations = { .read = generic_read_dir, .readdir = xfs_file_readdir, + .llseek = generic_file_llseek, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 1eefe61f0e10..36caa6d957df 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -31,7 +31,7 @@ xfs_tosspages( xfs_off_t last, int fiopt) { - struct address_space *mapping = ip->i_vnode->i_mapping; + struct address_space *mapping = VFS_I(ip)->i_mapping; if (mapping->nrpages) truncate_inode_pages(mapping, first); @@ -44,7 +44,7 @@ xfs_flushinval_pages( xfs_off_t last, int fiopt) { - struct address_space *mapping = ip->i_vnode->i_mapping; + struct address_space *mapping = VFS_I(ip)->i_mapping; int ret = 0; if (mapping->nrpages) { @@ -64,7 +64,7 @@ xfs_flush_pages( uint64_t flags, int fiopt) { - struct address_space *mapping = ip->i_vnode->i_mapping; + struct address_space *mapping = VFS_I(ip)->i_mapping; int ret = 0; int ret2; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index acb978d9d085..48799ba7e3e6 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -245,7 +245,7 @@ xfs_vget_fsop_handlereq( xfs_iunlock(ip, XFS_ILOCK_SHARED); - *inode = XFS_ITOV(ip); + *inode = VFS_I(ip); return 0; } @@ -927,7 +927,7 @@ STATIC void xfs_diflags_to_linux( struct xfs_inode *ip) { - struct inode *inode = XFS_ITOV(ip); + struct inode *inode = VFS_I(ip); unsigned int xflags = xfs_ip2xflags(ip); if (xflags & XFS_XFLAG_IMMUTABLE) diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index e88f51028086..095d271f3434 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -62,7 +62,7 @@ void xfs_synchronize_atime( xfs_inode_t *ip) { - struct inode *inode = ip->i_vnode; + struct inode *inode = VFS_I(ip); if (inode) { ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; @@ -79,7 +79,7 @@ void xfs_mark_inode_dirty_sync( xfs_inode_t *ip) { - struct inode *inode = ip->i_vnode; + struct inode *inode = VFS_I(ip); if (inode) mark_inode_dirty_sync(inode); @@ -89,36 +89,31 @@ xfs_mark_inode_dirty_sync( * Change the requested timestamp in the given inode. * We don't lock across timestamp updates, and we don't log them but * we do record the fact that there is dirty information in core. - * - * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG - * with XFS_ICHGTIME_ACC to be sure that access time - * update will take. Calling first with XFS_ICHGTIME_ACC - * and then XFS_ICHGTIME_MOD may fail to modify the access - * timestamp if the filesystem is mounted noacctm. */ void xfs_ichgtime( xfs_inode_t *ip, int flags) { - struct inode *inode = vn_to_inode(XFS_ITOV(ip)); + struct inode *inode = VFS_I(ip); timespec_t tv; + int sync_it = 0; + + tv = current_fs_time(inode->i_sb); - nanotime(&tv); - if (flags & XFS_ICHGTIME_MOD) { + if ((flags & XFS_ICHGTIME_MOD) && + !timespec_equal(&inode->i_mtime, &tv)) { inode->i_mtime = tv; ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; + sync_it = 1; } - if (flags & XFS_ICHGTIME_ACC) { - inode->i_atime = tv; - ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec; - ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec; - } - if (flags & XFS_ICHGTIME_CHG) { + if ((flags & XFS_ICHGTIME_CHG) && + !timespec_equal(&inode->i_ctime, &tv)) { inode->i_ctime = tv; ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; + sync_it = 1; } /* @@ -130,55 +125,11 @@ xfs_ichgtime( * ensure that the compiler does not reorder the update * of i_update_core above the timestamp updates above. */ - SYNCHRONIZE(); - ip->i_update_core = 1; - if (!(inode->i_state & I_NEW)) + if (sync_it) { + SYNCHRONIZE(); + ip->i_update_core = 1; mark_inode_dirty_sync(inode); -} - -/* - * Variant on the above which avoids querying the system clock - * in situations where we know the Linux inode timestamps have - * just been updated (and so we can update our inode cheaply). - */ -void -xfs_ichgtime_fast( - xfs_inode_t *ip, - struct inode *inode, - int flags) -{ - timespec_t *tvp; - - /* - * Atime updates for read() & friends are handled lazily now, and - * explicit updates must go through xfs_ichgtime() - */ - ASSERT((flags & XFS_ICHGTIME_ACC) == 0); - - if (flags & XFS_ICHGTIME_MOD) { - tvp = &inode->i_mtime; - ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; - ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec; } - if (flags & XFS_ICHGTIME_CHG) { - tvp = &inode->i_ctime; - ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec; - ip->i_d.di_ctime.t_nsec = (__int32_t)tvp->tv_nsec; - } - - /* - * We update the i_update_core field _after_ changing - * the timestamps in order to coordinate properly with - * xfs_iflush() so that we don't lose timestamp updates. - * This keeps us from having to hold the inode lock - * while doing this. We use the SYNCHRONIZE macro to - * ensure that the compiler does not reorder the update - * of i_update_core above the timestamp updates above. - */ - SYNCHRONIZE(); - ip->i_update_core = 1; - if (!(inode->i_state & I_NEW)) - mark_inode_dirty_sync(inode); } /* @@ -299,7 +250,7 @@ xfs_vn_mknod( if (unlikely(error)) goto out_free_acl; - inode = ip->i_vnode; + inode = VFS_I(ip); error = xfs_init_security(inode, dir); if (unlikely(error)) @@ -366,7 +317,7 @@ xfs_vn_lookup( return NULL; } - return d_splice_alias(cip->i_vnode, dentry); + return d_splice_alias(VFS_I(cip), dentry); } STATIC struct dentry * @@ -399,12 +350,12 @@ xfs_vn_ci_lookup( /* if exact match, just splice and exit */ if (!ci_name.name) - return d_splice_alias(ip->i_vnode, dentry); + return d_splice_alias(VFS_I(ip), dentry); /* else case-insensitive match... */ dname.name = ci_name.name; dname.len = ci_name.len; - dentry = d_add_ci(ip->i_vnode, dentry, &dname); + dentry = d_add_ci(dentry, VFS_I(ip), &dname); kmem_free(ci_name.name); return dentry; } @@ -478,7 +429,7 @@ xfs_vn_symlink( if (unlikely(error)) goto out; - inode = cip->i_vnode; + inode = VFS_I(cip); error = xfs_init_security(inode, dir); if (unlikely(error)) @@ -710,7 +661,7 @@ out_error: return error; } -const struct inode_operations xfs_inode_operations = { +static const struct inode_operations xfs_inode_operations = { .permission = xfs_vn_permission, .truncate = xfs_vn_truncate, .getattr = xfs_vn_getattr, @@ -722,7 +673,7 @@ const struct inode_operations xfs_inode_operations = { .fallocate = xfs_vn_fallocate, }; -const struct inode_operations xfs_dir_inode_operations = { +static const struct inode_operations xfs_dir_inode_operations = { .create = xfs_vn_create, .lookup = xfs_vn_lookup, .link = xfs_vn_link, @@ -747,7 +698,7 @@ const struct inode_operations xfs_dir_inode_operations = { .listxattr = xfs_vn_listxattr, }; -const struct inode_operations xfs_dir_ci_inode_operations = { +static const struct inode_operations xfs_dir_ci_inode_operations = { .create = xfs_vn_create, .lookup = xfs_vn_ci_lookup, .link = xfs_vn_link, @@ -772,7 +723,7 @@ const struct inode_operations xfs_dir_ci_inode_operations = { .listxattr = xfs_vn_listxattr, }; -const struct inode_operations xfs_symlink_inode_operations = { +static const struct inode_operations xfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = xfs_vn_follow_link, .put_link = xfs_vn_put_link, @@ -784,3 +735,98 @@ const struct inode_operations xfs_symlink_inode_operations = { .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, }; + +STATIC void +xfs_diflags_to_iflags( + struct inode *inode, + struct xfs_inode *ip) +{ + if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) + inode->i_flags |= S_SYNC; + else + inode->i_flags &= ~S_SYNC; + if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) + inode->i_flags |= S_NOATIME; + else + inode->i_flags &= ~S_NOATIME; +} + +/* + * Initialize the Linux inode, set up the operation vectors and + * unlock the inode. + * + * When reading existing inodes from disk this is called directly + * from xfs_iget, when creating a new inode it is called from + * xfs_ialloc after setting up the inode. + */ +void +xfs_setup_inode( + struct xfs_inode *ip) +{ + struct inode *inode = ip->i_vnode; + + inode->i_mode = ip->i_d.di_mode; + inode->i_nlink = ip->i_d.di_nlink; + inode->i_uid = ip->i_d.di_uid; + inode->i_gid = ip->i_d.di_gid; + + switch (inode->i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + inode->i_rdev = + MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: + inode->i_rdev = 0; + break; + } + + inode->i_generation = ip->i_d.di_gen; + i_size_write(inode, ip->i_d.di_size); + inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; + inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; + inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; + inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; + inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; + inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + xfs_diflags_to_iflags(inode, ip); + xfs_iflags_clear(ip, XFS_IMODIFIED); + + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_op = &xfs_inode_operations; + inode->i_fop = &xfs_file_operations; + inode->i_mapping->a_ops = &xfs_address_space_operations; + break; + case S_IFDIR: + if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) + inode->i_op = &xfs_dir_ci_inode_operations; + else + inode->i_op = &xfs_dir_inode_operations; + inode->i_fop = &xfs_dir_file_operations; + break; + case S_IFLNK: + inode->i_op = &xfs_symlink_inode_operations; + if (!(ip->i_df.if_flags & XFS_IFINLINE)) + inode->i_mapping->a_ops = &xfs_address_space_operations; + break; + default: + inode->i_op = &xfs_inode_operations; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + break; + } + + xfs_iflags_clear(ip, XFS_INEW); + barrier(); + + unlock_new_inode(inode); +} diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h index d97ba934a2ac..8b1a1e31dc21 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/linux-2.6/xfs_iops.h @@ -18,10 +18,7 @@ #ifndef __XFS_IOPS_H__ #define __XFS_IOPS_H__ -extern const struct inode_operations xfs_inode_operations; -extern const struct inode_operations xfs_dir_inode_operations; -extern const struct inode_operations xfs_dir_ci_inode_operations; -extern const struct inode_operations xfs_symlink_inode_operations; +struct xfs_inode; extern const struct file_operations xfs_file_operations; extern const struct file_operations xfs_dir_file_operations; @@ -29,14 +26,6 @@ extern const struct file_operations xfs_invis_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); -struct xfs_inode; -extern void xfs_ichgtime(struct xfs_inode *, int); -extern void xfs_ichgtime_fast(struct xfs_inode *, struct inode *, int); - -#define xfs_vtoi(vp) \ - ((struct xfs_inode *)vn_to_inode(vp)->i_private) - -#define XFS_I(inode) \ - ((struct xfs_inode *)(inode)->i_private) +extern void xfs_setup_inode(struct xfs_inode *); #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 4d45d9351a6c..cc0f7b3a9795 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -45,13 +45,13 @@ #include <mrlock.h> #include <sv.h> #include <mutex.h> -#include <sema.h> #include <time.h> #include <support/ktrace.h> #include <support/debug.h> #include <support/uuid.h> +#include <linux/semaphore.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/blkdev.h> @@ -126,8 +126,6 @@ #define current_cpu() (raw_smp_processor_id()) #define current_pid() (current->pid) -#define current_fsuid(cred) (current->fsuid) -#define current_fsgid(cred) (current->fsgid) #define current_test_flags(f) (current->flags & (f)) #define current_set_flags_nested(sp, f) \ (*(sp) = current->flags, current->flags |= (f)) @@ -180,7 +178,7 @@ #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) #define xfs_stack_trace() dump_stack() #define xfs_itruncate_data(ip, off) \ - (-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off))) + (-vmtruncate(VFS_I(ip), (off))) /* Move the kernel do_div definition off to one side */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 82333b3e118e..1957e5357d04 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -137,7 +137,7 @@ xfs_iozero( struct address_space *mapping; int status; - mapping = ip->i_vnode->i_mapping; + mapping = VFS_I(ip)->i_mapping; do { unsigned offset, bytes; void *fsdata; @@ -674,9 +674,7 @@ start: */ if (likely(!(ioflags & IO_INVIS) && !mnt_want_write(file->f_path.mnt))) { - file_update_time(file); - xfs_ichgtime_fast(xip, inode, - XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); mnt_drop_write(file->f_path.mnt); } diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 30ae96397e31..73c65f19e549 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -581,118 +581,6 @@ xfs_max_file_offset( return (((__uint64_t)pagefactor) << bitshift) - 1; } -STATIC_INLINE void -xfs_set_inodeops( - struct inode *inode) -{ - switch (inode->i_mode & S_IFMT) { - case S_IFREG: - inode->i_op = &xfs_inode_operations; - inode->i_fop = &xfs_file_operations; - inode->i_mapping->a_ops = &xfs_address_space_operations; - break; - case S_IFDIR: - if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) - inode->i_op = &xfs_dir_ci_inode_operations; - else - inode->i_op = &xfs_dir_inode_operations; - inode->i_fop = &xfs_dir_file_operations; - break; - case S_IFLNK: - inode->i_op = &xfs_symlink_inode_operations; - if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE)) - inode->i_mapping->a_ops = &xfs_address_space_operations; - break; - default: - inode->i_op = &xfs_inode_operations; - init_special_inode(inode, inode->i_mode, inode->i_rdev); - break; - } -} - -STATIC_INLINE void -xfs_revalidate_inode( - xfs_mount_t *mp, - bhv_vnode_t *vp, - xfs_inode_t *ip) -{ - struct inode *inode = vn_to_inode(vp); - - inode->i_mode = ip->i_d.di_mode; - inode->i_nlink = ip->i_d.di_nlink; - inode->i_uid = ip->i_d.di_uid; - inode->i_gid = ip->i_d.di_gid; - - switch (inode->i_mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - inode->i_rdev = - MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, - sysv_minor(ip->i_df.if_u2.if_rdev)); - break; - default: - inode->i_rdev = 0; - break; - } - - inode->i_generation = ip->i_d.di_gen; - i_size_write(inode, ip->i_d.di_size); - inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; - inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; - inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; - inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; - inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; - inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; - if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; - xfs_iflags_clear(ip, XFS_IMODIFIED); -} - -void -xfs_initialize_vnode( - struct xfs_mount *mp, - bhv_vnode_t *vp, - struct xfs_inode *ip) -{ - struct inode *inode = vn_to_inode(vp); - - if (!ip->i_vnode) { - ip->i_vnode = vp; - inode->i_private = ip; - } - - /* - * We need to set the ops vectors, and unlock the inode, but if - * we have been called during the new inode create process, it is - * too early to fill in the Linux inode. We will get called a - * second time once the inode is properly set up, and then we can - * finish our work. - */ - if (ip->i_d.di_mode != 0 && (inode->i_state & I_NEW)) { - xfs_revalidate_inode(mp, vp, ip); - xfs_set_inodeops(inode); - - xfs_iflags_clear(ip, XFS_INEW); - barrier(); - - unlock_new_inode(inode); - } -} - int xfs_blkdev_get( xfs_mount_t *mp, @@ -982,26 +870,21 @@ STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) { - bhv_vnode_t *vp; - - vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); - if (unlikely(!vp)) - return NULL; - return vn_to_inode(vp); + return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); } STATIC void xfs_fs_destroy_inode( struct inode *inode) { - kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode)); + kmem_zone_free(xfs_vnode_zone, inode); } STATIC void xfs_fs_inode_init_once( void *vnode) { - inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); + inode_init_once((struct inode *)vnode); } /* @@ -1106,7 +989,7 @@ void xfs_flush_inode( xfs_inode_t *ip) { - struct inode *inode = ip->i_vnode; + struct inode *inode = VFS_I(ip); igrab(inode); xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); @@ -1131,7 +1014,7 @@ void xfs_flush_device( xfs_inode_t *ip) { - struct inode *inode = vn_to_inode(XFS_ITOV(ip)); + struct inode *inode = VFS_I(ip); igrab(inode); xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); @@ -1201,6 +1084,15 @@ xfssyncd( } STATIC void +xfs_free_fsname( + struct xfs_mount *mp) +{ + kfree(mp->m_fsname); + kfree(mp->m_rtname); + kfree(mp->m_logname); +} + +STATIC void xfs_fs_put_super( struct super_block *sb) { @@ -1239,8 +1131,6 @@ xfs_fs_put_super( error = xfs_unmount_flush(mp, 0); WARN_ON(error); - IRELE(rip); - /* * If we're forcing a shutdown, typically because of a media error, * we want to make sure we invalidate dirty pages that belong to @@ -1257,10 +1147,12 @@ xfs_fs_put_super( } xfs_unmountfs(mp); + xfs_freesb(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); xfs_qmops_put(mp); xfs_dmops_put(mp); + xfs_free_fsname(mp); kfree(mp); } @@ -1517,6 +1409,8 @@ xfs_start_flags( struct xfs_mount_args *ap, struct xfs_mount *mp) { + int error; + /* Values are in BBs */ if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { /* @@ -1549,17 +1443,27 @@ xfs_start_flags( ap->logbufsize); return XFS_ERROR(EINVAL); } + + error = ENOMEM; + mp->m_logbsize = ap->logbufsize; mp->m_fsname_len = strlen(ap->fsname) + 1; - mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); - strcpy(mp->m_fsname, ap->fsname); + + mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL); + if (!mp->m_fsname) + goto out; + if (ap->rtname[0]) { - mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP); - strcpy(mp->m_rtname, ap->rtname); + mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL); + if (!mp->m_rtname) + goto out_free_fsname; + } + if (ap->logname[0]) { - mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP); - strcpy(mp->m_logname, ap->logname); + mp->m_logname = kstrdup(ap->logname, GFP_KERNEL); + if (!mp->m_logname) + goto out_free_rtname; } if (ap->flags & XFSMNT_WSYNC) @@ -1632,6 +1536,14 @@ xfs_start_flags( if (ap->flags & XFSMNT_DMAPI) mp->m_flags |= XFS_MOUNT_DMAPI; return 0; + + + out_free_rtname: + kfree(mp->m_rtname); + out_free_fsname: + kfree(mp->m_fsname); + out: + return error; } /* @@ -1792,10 +1704,10 @@ xfs_fs_fill_super( */ error = xfs_start_flags(args, mp); if (error) - goto out_destroy_counters; + goto out_free_fsname; error = xfs_readsb(mp, flags); if (error) - goto out_destroy_counters; + goto out_free_fsname; error = xfs_finish_flags(args, mp); if (error) goto out_free_sb; @@ -1811,7 +1723,7 @@ xfs_fs_fill_super( if (error) goto out_free_sb; - error = xfs_mountfs(mp, flags); + error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; @@ -1825,7 +1737,7 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); - root = igrab(mp->m_rootip->i_vnode); + root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = ENOENT; goto fail_unmount; @@ -1857,7 +1769,8 @@ xfs_fs_fill_super( xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); - out_destroy_counters: + out_free_fsname: + xfs_free_fsname(mp); xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); out_put_qmops: @@ -1890,10 +1803,8 @@ xfs_fs_fill_super( error = xfs_unmount_flush(mp, 0); WARN_ON(error); - IRELE(mp->m_rootip); - xfs_unmountfs(mp); - goto out_destroy_counters; + goto out_free_sb; } STATIC int @@ -2014,7 +1925,7 @@ xfs_free_trace_bufs(void) STATIC int __init xfs_init_zones(void) { - xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode", + xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode", KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, xfs_fs_inode_init_once); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index b7d13da01bd6..fe2ef4e6a0f9 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -101,9 +101,6 @@ struct block_device; extern __uint64_t xfs_max_file_offset(unsigned int); -extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp, - struct xfs_inode *ip); - extern void xfs_flush_inode(struct xfs_inode *); extern void xfs_flush_device(struct xfs_inode *); diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index 25488b6d9881..b52528bbbfff 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -33,7 +33,7 @@ /* - * Dedicated vnode inactive/reclaim sync semaphores. + * Dedicated vnode inactive/reclaim sync wait queues. * Prime number of hash buckets since address is used as the key. */ #define NVSYNC 37 @@ -82,24 +82,6 @@ vn_ioerror( xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); } - -/* - * Add a reference to a referenced vnode. - */ -bhv_vnode_t * -vn_hold( - bhv_vnode_t *vp) -{ - struct inode *inode; - - XFS_STATS_INC(vn_hold); - - inode = igrab(vn_to_inode(vp)); - ASSERT(inode); - - return vp; -} - #ifdef XFS_INODE_TRACE /* @@ -108,7 +90,7 @@ vn_hold( */ static inline int xfs_icount(struct xfs_inode *ip) { - bhv_vnode_t *vp = XFS_ITOV_NULL(ip); + struct inode *vp = VFS_I(ip); if (vp) return vn_count(vp); diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 41ca2cec5d31..683ce16210ff 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -22,20 +22,6 @@ struct file; struct xfs_iomap; struct attrlist_cursor_kern; -typedef struct inode bhv_vnode_t; - -/* - * Vnode to Linux inode mapping. - */ -static inline bhv_vnode_t *vn_from_inode(struct inode *inode) -{ - return inode; -} -static inline struct inode *vn_to_inode(bhv_vnode_t *vnode) -{ - return vnode; -} - /* * Return values for xfs_inactive. A return value of * VN_INACTIVE_NOCACHE implies that the file system behavior @@ -76,57 +62,52 @@ extern void vn_iowait(struct xfs_inode *ip); extern void vn_iowake(struct xfs_inode *ip); extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); -static inline int vn_count(bhv_vnode_t *vp) +static inline int vn_count(struct inode *vp) { - return atomic_read(&vn_to_inode(vp)->i_count); + return atomic_read(&vp->i_count); } -/* - * Vnode reference counting functions (and macros for compatibility). - */ -extern bhv_vnode_t *vn_hold(bhv_vnode_t *); +#define IHOLD(ip) \ +do { \ + ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ + atomic_inc(&(VFS_I(ip)->i_count)); \ + xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ +} while (0) -#if defined(XFS_INODE_TRACE) -#define VN_HOLD(vp) \ - ((void)vn_hold(vp), \ - xfs_itrace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address)) -#define VN_RELE(vp) \ - (xfs_itrace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \ - iput(vn_to_inode(vp))) -#else -#define VN_HOLD(vp) ((void)vn_hold(vp)) -#define VN_RELE(vp) (iput(vn_to_inode(vp))) -#endif +#define IRELE(ip) \ +do { \ + xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ + iput(VFS_I(ip)); \ +} while (0) -static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp) +static inline struct inode *vn_grab(struct inode *vp) { - struct inode *inode = igrab(vn_to_inode(vp)); - return inode ? vn_from_inode(inode) : NULL; + return igrab(vp); } /* * Dealing with bad inodes */ -static inline int VN_BAD(bhv_vnode_t *vp) +static inline int VN_BAD(struct inode *vp) { - return is_bad_inode(vn_to_inode(vp)); + return is_bad_inode(vp); } /* * Extracting atime values in various formats */ -static inline void vn_atime_to_bstime(bhv_vnode_t *vp, xfs_bstime_t *bs_atime) +static inline void vn_atime_to_bstime(struct inode *vp, xfs_bstime_t *bs_atime) { bs_atime->tv_sec = vp->i_atime.tv_sec; bs_atime->tv_nsec = vp->i_atime.tv_nsec; } -static inline void vn_atime_to_timespec(bhv_vnode_t *vp, struct timespec *ts) +static inline void vn_atime_to_timespec(struct inode *vp, struct timespec *ts) { *ts = vp->i_atime; } -static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) +static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt) { *tt = vp->i_atime.tv_sec; } @@ -134,9 +115,9 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) /* * Some useful predicates. */ -#define VN_MAPPED(vp) mapping_mapped(vn_to_inode(vp)->i_mapping) -#define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages) -#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ +#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) +#define VN_CACHED(vp) (vp->i_mapping->nrpages) +#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \ PAGECACHE_TAG_DIRTY) diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index fc9f3fb39b7b..f2705f2fd43c 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -101,11 +101,18 @@ xfs_qm_dqinit( if (brandnewdquot) { dqp->dq_flnext = dqp->dq_flprev = dqp; mutex_init(&dqp->q_qlock); - initnsema(&dqp->q_flock, 1, "fdq"); sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&dqp->q_flush); + complete(&dqp->q_flush); + #ifdef XFS_DQUOT_TRACE - dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP); + dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS); xfs_dqtrace_entry(dqp, "DQINIT"); #endif } else { @@ -150,7 +157,6 @@ xfs_qm_dqdestroy( ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp)); mutex_destroy(&dqp->q_qlock); - freesema(&dqp->q_flock); sv_destroy(&dqp->q_pinwait); #ifdef XFS_DQUOT_TRACE @@ -431,7 +437,7 @@ xfs_qm_dqalloc( * when it unlocks the inode. Since we want to keep the quota * inode around, we bump the vnode ref count now. */ - VN_HOLD(XFS_ITOV(quotip)); + IHOLD(quotip); xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); nmaps = 1; @@ -1211,7 +1217,7 @@ xfs_qm_dqflush( int error; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); + ASSERT(!completion_done(&dqp->q_flush)); xfs_dqtrace_entry(dqp, "DQFLUSH"); /* @@ -1348,34 +1354,18 @@ xfs_qm_dqflush_done( xfs_dqfunlock(dqp); } - -int -xfs_qm_dqflock_nowait( - xfs_dquot_t *dqp) -{ - int locked; - - locked = cpsema(&((dqp)->q_flock)); - - /* XXX ifdef these out */ - if (locked) - (dqp)->dq_flags |= XFS_DQ_FLOCKED; - return (locked); -} - - int xfs_qm_dqlock_nowait( xfs_dquot_t *dqp) { - return (mutex_trylock(&((dqp)->q_qlock))); + return mutex_trylock(&dqp->q_qlock); } void xfs_dqlock( xfs_dquot_t *dqp) { - mutex_lock(&(dqp->q_qlock)); + mutex_lock(&dqp->q_qlock); } void @@ -1468,7 +1458,7 @@ xfs_qm_dqpurge( * if we're turning off quotas. Basically, we need this flush * lock, and are willing to block on it. */ - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { /* * Block on the flush lock after nudging dquot buffer, * if it is incore. diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index f7393bba4e95..8958d0faf8d3 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -82,7 +82,7 @@ typedef struct xfs_dquot { xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ mutex_t q_qlock; /* quota lock */ - sema_t q_flock; /* flush lock */ + struct completion q_flush; /* flush completion queue */ uint q_pincount; /* pin count for this dquot */ sv_t q_pinwait; /* sync var for pinning */ #ifdef XFS_DQUOT_TRACE @@ -113,17 +113,25 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) /* - * The following three routines simply manage the q_flock - * semaphore embedded in the dquot. This semaphore synchronizes - * processes attempting to flush the in-core dquot back to disk. + * Manage the q_flush completion queue embedded in the dquot. This completion + * queue synchronizes processes attempting to flush the in-core dquot back to + * disk. */ -#define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\ - (dqp)->dq_flags |= XFS_DQ_FLOCKED; } -#define xfs_dqfunlock(dqp) { ASSERT(issemalocked(&((dqp)->q_flock))); \ - vsema(&((dqp)->q_flock)); \ - (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); } +static inline void xfs_dqflock(xfs_dquot_t *dqp) +{ + wait_for_completion(&dqp->q_flush); +} + +static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) +{ + return try_wait_for_completion(&dqp->q_flush); +} + +static inline void xfs_dqfunlock(xfs_dquot_t *dqp) +{ + complete(&dqp->q_flush); +} -#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock))) #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) @@ -167,7 +175,6 @@ extern int xfs_qm_dqflush(xfs_dquot_t *, uint); extern int xfs_qm_dqpurge(xfs_dquot_t *); extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); -extern int xfs_qm_dqflock_nowait(xfs_dquot_t *); extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, xfs_disk_dquot_t *); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 08d2fc89e6a1..f028644caa5e 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -151,7 +151,7 @@ xfs_qm_dquot_logitem_push( dqp = logitem->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); + ASSERT(!completion_done(&dqp->q_flush)); /* * Since we were able to lock the dquot's flush lock and @@ -245,7 +245,7 @@ xfs_qm_dquot_logitem_pushbuf( * inode flush completed and the inode was taken off the AIL. * So, just get out. */ - if (!issemalocked(&(dqp->q_flock)) || + if (completion_done(&dqp->q_flush) || ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); @@ -258,7 +258,7 @@ xfs_qm_dquot_logitem_pushbuf( if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && - issemalocked(&(dqp->q_flock))); + !completion_done(&dqp->q_flush)); qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); @@ -317,7 +317,7 @@ xfs_qm_dquot_logitem_trylock( return (XFS_ITEM_LOCKED); retval = XFS_ITEM_SUCCESS; - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { /* * The dquot is already being flushed. It may have been * flushed delayed write, however, and we don't want to diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 021934a3d456..df0ffef9775a 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -310,8 +310,7 @@ xfs_qm_unmount_quotadestroy( */ void xfs_qm_mount_quotas( - xfs_mount_t *mp, - int mfsi_flags) + xfs_mount_t *mp) { int error = 0; uint sbf; @@ -346,8 +345,7 @@ xfs_qm_mount_quotas( /* * If any of the quotas are not consistent, do a quotacheck. */ - if (XFS_QM_NEED_QUOTACHECK(mp) && - !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { + if (XFS_QM_NEED_QUOTACHECK(mp)) { error = xfs_qm_quotacheck(mp); if (error) { /* Quotacheck failed and disabled quotas. */ @@ -484,7 +482,7 @@ again: xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); /* XXX a sentinel would be better */ recl = XFS_QI_MPLRECLAIMS(mp); - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { /* * If we can't grab the flush lock then check * to see if the dquot has been flushed delayed @@ -1062,7 +1060,7 @@ xfs_qm_sync( /* XXX a sentinel would be better */ recl = XFS_QI_MPLRECLAIMS(mp); - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { if (nowait) { xfs_dqunlock(dqp); continue; @@ -2079,7 +2077,7 @@ xfs_qm_shake_freelist( * Try to grab the flush lock. If this dquot is in the process of * getting flushed to disk, we don't want to reclaim it. */ - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { xfs_dqunlock(dqp); dqp = dqp->dq_flnext; continue; @@ -2257,7 +2255,7 @@ xfs_qm_dqreclaim_one(void) * Try to grab the flush lock. If this dquot is in the process of * getting flushed to disk, we don't want to reclaim it. */ - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { xfs_dqunlock(dqp); continue; } diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index cd2300e374af..44f25349e478 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct { #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); -extern void xfs_qm_mount_quotas(xfs_mount_t *, int); +extern void xfs_qm_mount_quotas(xfs_mount_t *); extern int xfs_qm_quotacheck(xfs_mount_t *); extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); extern int xfs_qm_unmount_quotas(xfs_mount_t *); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index f4f6c4c861d7..eea2e60b456b 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -162,7 +162,7 @@ xfs_qm_newmount( * mounting, and get on with the boring life * without disk quotas. */ - xfs_qm_mount_quotas(mp, 0); + xfs_qm_mount_quotas(mp); } else { /* * Clear the quota flags, but remember them. This @@ -184,13 +184,12 @@ STATIC int xfs_qm_endmount( xfs_mount_t *mp, uint needquotamount, - uint quotaflags, - int mfsi_flags) + uint quotaflags) { if (needquotamount) { ASSERT(mp->m_qflags == 0); mp->m_qflags = quotaflags; - xfs_qm_mount_quotas(mp, mfsi_flags); + xfs_qm_mount_quotas(mp); } #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index adfb8723f65a..1a3b803dfa55 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -1034,7 +1034,7 @@ xfs_qm_dqrele_all_inodes( { xfs_inode_t *ip, *topino; uint ireclaims; - bhv_vnode_t *vp; + struct inode *vp; boolean_t vnode_refd; ASSERT(mp->m_quotainfo); @@ -1059,7 +1059,7 @@ again: ip = ip->i_mnext; continue; } - vp = XFS_ITOV_NULL(ip); + vp = VFS_I(ip); if (!vp) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 3e4648ad9cfc..b2f639a1416f 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -37,15 +37,15 @@ #include <linux/capability.h> #include <linux/posix_acl_xattr.h> -STATIC int xfs_acl_setmode(bhv_vnode_t *, xfs_acl_t *, int *); +STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *); STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); STATIC void xfs_acl_get_endian(xfs_acl_t *); STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); STATIC int xfs_acl_invalid(xfs_acl_t *); STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); -STATIC void xfs_acl_get_attr(bhv_vnode_t *, xfs_acl_t *, int, int, int *); -STATIC void xfs_acl_set_attr(bhv_vnode_t *, xfs_acl_t *, int, int *); -STATIC int xfs_acl_allow_set(bhv_vnode_t *, int); +STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *); +STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *); +STATIC int xfs_acl_allow_set(struct inode *, int); kmem_zone_t *xfs_acl_zone; @@ -55,7 +55,7 @@ kmem_zone_t *xfs_acl_zone; */ int xfs_acl_vhasacl_access( - bhv_vnode_t *vp) + struct inode *vp) { int error; @@ -68,7 +68,7 @@ xfs_acl_vhasacl_access( */ int xfs_acl_vhasacl_default( - bhv_vnode_t *vp) + struct inode *vp) { int error; @@ -207,7 +207,7 @@ posix_acl_xfs_to_xattr( int xfs_acl_vget( - bhv_vnode_t *vp, + struct inode *vp, void *acl, size_t size, int kind) @@ -217,7 +217,6 @@ xfs_acl_vget( posix_acl_xattr_header *ext_acl = acl; int flags = 0; - VN_HOLD(vp); if(size) { if (!(_ACL_ALLOC(xfs_acl))) { error = ENOMEM; @@ -239,11 +238,10 @@ xfs_acl_vget( goto out; } if (kind == _ACL_TYPE_ACCESS) - xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl); + xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl); error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); } out: - VN_RELE(vp); if(xfs_acl) _ACL_FREE(xfs_acl); return -error; @@ -251,28 +249,26 @@ out: int xfs_acl_vremove( - bhv_vnode_t *vp, + struct inode *vp, int kind) { int error; - VN_HOLD(vp); error = xfs_acl_allow_set(vp, kind); if (!error) { - error = xfs_attr_remove(xfs_vtoi(vp), + error = xfs_attr_remove(XFS_I(vp), kind == _ACL_TYPE_DEFAULT? SGI_ACL_DEFAULT: SGI_ACL_FILE, ATTR_ROOT); if (error == ENOATTR) error = 0; /* 'scool */ } - VN_RELE(vp); return -error; } int xfs_acl_vset( - bhv_vnode_t *vp, + struct inode *vp, void *acl, size_t size, int kind) @@ -298,7 +294,6 @@ xfs_acl_vset( return 0; } - VN_HOLD(vp); error = xfs_acl_allow_set(vp, kind); /* Incoming ACL exists, set file mode based on its value */ @@ -321,7 +316,6 @@ xfs_acl_vset( } out: - VN_RELE(vp); _ACL_FREE(xfs_acl); return -error; } @@ -363,7 +357,7 @@ xfs_acl_iaccess( STATIC int xfs_acl_allow_set( - bhv_vnode_t *vp, + struct inode *vp, int kind) { if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) @@ -372,7 +366,7 @@ xfs_acl_allow_set( return ENOTDIR; if (vp->i_sb->s_flags & MS_RDONLY) return EROFS; - if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) + if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) return EPERM; return 0; } @@ -566,7 +560,7 @@ xfs_acl_get_endian( */ STATIC void xfs_acl_get_attr( - bhv_vnode_t *vp, + struct inode *vp, xfs_acl_t *aclp, int kind, int flags, @@ -576,7 +570,7 @@ xfs_acl_get_attr( ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); flags |= ATTR_ROOT; - *error = xfs_attr_get(xfs_vtoi(vp), + *error = xfs_attr_get(XFS_I(vp), kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE : SGI_ACL_DEFAULT, (char *)aclp, &len, flags); @@ -590,7 +584,7 @@ xfs_acl_get_attr( */ STATIC void xfs_acl_set_attr( - bhv_vnode_t *vp, + struct inode *vp, xfs_acl_t *aclp, int kind, int *error) @@ -615,7 +609,7 @@ xfs_acl_set_attr( INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); } INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); - *error = xfs_attr_set(xfs_vtoi(vp), + *error = xfs_attr_set(XFS_I(vp), kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE: SGI_ACL_DEFAULT, (char *)newacl, len, ATTR_ROOT); @@ -624,7 +618,7 @@ xfs_acl_set_attr( int xfs_acl_vtoacl( - bhv_vnode_t *vp, + struct inode *vp, xfs_acl_t *access_acl, xfs_acl_t *default_acl) { @@ -639,7 +633,7 @@ xfs_acl_vtoacl( if (error) access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; else /* We have a good ACL and the file mode, synchronize. */ - xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl); + xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl); } if (default_acl) { @@ -656,7 +650,7 @@ xfs_acl_vtoacl( */ int xfs_acl_inherit( - bhv_vnode_t *vp, + struct inode *vp, mode_t mode, xfs_acl_t *pdaclp) { @@ -715,7 +709,7 @@ out_error: */ STATIC int xfs_acl_setmode( - bhv_vnode_t *vp, + struct inode *vp, xfs_acl_t *acl, int *basicperms) { @@ -734,7 +728,7 @@ xfs_acl_setmode( * mode. The m:: bits take precedence over the g:: bits. */ iattr.ia_valid = ATTR_MODE; - iattr.ia_mode = xfs_vtoi(vp)->i_d.di_mode; + iattr.ia_mode = XFS_I(vp)->i_d.di_mode; iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); ap = acl->acl_entry; for (i = 0; i < acl->acl_cnt; ++i) { @@ -764,7 +758,7 @@ xfs_acl_setmode( if (gap && nomask) iattr.ia_mode |= gap->ae_perm << 3; - return xfs_setattr(xfs_vtoi(vp), &iattr, 0, sys_cred); + return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred); } /* diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 323ee94cf831..a4e293b93efa 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -59,14 +59,14 @@ extern struct kmem_zone *xfs_acl_zone; (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) #define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) -extern int xfs_acl_inherit(bhv_vnode_t *, mode_t mode, xfs_acl_t *); +extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *); extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); -extern int xfs_acl_vtoacl(bhv_vnode_t *, xfs_acl_t *, xfs_acl_t *); -extern int xfs_acl_vhasacl_access(bhv_vnode_t *); -extern int xfs_acl_vhasacl_default(bhv_vnode_t *); -extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int); -extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int); -extern int xfs_acl_vremove(bhv_vnode_t *, int); +extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *); +extern int xfs_acl_vhasacl_access(struct inode *); +extern int xfs_acl_vhasacl_default(struct inode *); +extern int xfs_acl_vset(struct inode *, void *, size_t, int); +extern int xfs_acl_vget(struct inode *, void *, size_t, int); +extern int xfs_acl_vremove(struct inode *, int); #define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index f9472a2076d4..0b3b5efe848c 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h @@ -92,16 +92,6 @@ ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ } -/* define generic INT_ macros */ - -#define INT_GET(reference,arch) \ - (((arch) == ARCH_NOCONVERT) \ - ? \ - (reference) \ - : \ - INT_SWAP((reference),(reference)) \ - ) - /* does not return a value */ #define INT_SET(reference,arch,valueref) \ (__builtin_constant_p(valueref) ? \ @@ -112,64 +102,6 @@ ) \ ) -/* does not return a value */ -#define INT_MOD_EXPR(reference,arch,code) \ - (((arch) == ARCH_NOCONVERT) \ - ? \ - (void)((reference) code) \ - : \ - (void)( \ - (reference) = INT_GET((reference),arch) , \ - ((reference) code), \ - INT_SET(reference, arch, reference) \ - ) \ - ) - -/* does not return a value */ -#define INT_MOD(reference,arch,delta) \ - (void)( \ - INT_MOD_EXPR(reference,arch,+=(delta)) \ - ) - -/* - * INT_COPY - copy a value between two locations with the - * _same architecture_ but _potentially different sizes_ - * - * if the types of the two parameters are equal or they are - * in native architecture, a simple copy is done - * - * otherwise, architecture conversions are done - * - */ - -/* does not return a value */ -#define INT_COPY(dst,src,arch) \ - ( \ - ((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \ - ? \ - (void)((dst) = (src)) \ - : \ - INT_SET(dst, arch, INT_GET(src, arch)) \ - ) - -/* - * INT_XLATE - copy a value in either direction between two locations - * with different architectures - * - * dir < 0 - copy from memory to buffer (native to arch) - * dir > 0 - copy from buffer to memory (arch to native) - */ - -/* does not return a value */ -#define INT_XLATE(buf,mem,dir,arch) {\ - ASSERT(dir); \ - if (dir>0) { \ - (mem)=INT_GET(buf, arch); \ - } else { \ - INT_SET(buf, arch, mem); \ - } \ -} - /* * In directories inode numbers are stored as unaligned arrays of unsigned * 8bit integers on disk. diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index 78de80e3caa2..f7cdc28aff41 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -194,6 +194,46 @@ xfs_attr_get( return(error); } +/* + * Calculate how many blocks we need for the new attribute, + */ +int +xfs_attr_calc_size( + struct xfs_inode *ip, + int namelen, + int valuelen, + int *local) +{ + struct xfs_mount *mp = ip->i_mount; + int size; + int nblks; + + /* + * Determine space new attribute will use, and if it would be + * "local" or "remote" (note: local != inline). + */ + size = xfs_attr_leaf_newentsize(namelen, valuelen, + mp->m_sb.sb_blocksize, local); + + nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); + if (*local) { + if (size > (mp->m_sb.sb_blocksize >> 1)) { + /* Double split possible */ + nblks *= 2; + } + } else { + /* + * Out of line attribute, cannot double split, but + * make room for the attribute value itself. + */ + uint dblocks = XFS_B_TO_FSB(mp, valuelen); + nblks += dblocks; + nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); + } + + return nblks; +} + STATIC int xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, char *value, int valuelen, int flags) @@ -202,10 +242,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, xfs_fsblock_t firstblock; xfs_bmap_free_t flist; int error, err2, committed; - int local, size; - uint nblks; xfs_mount_t *mp = dp->i_mount; int rsvd = (flags & ATTR_ROOT) != 0; + int local; /* * Attach the dquots to the inode. @@ -241,30 +280,8 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, args.whichfork = XFS_ATTR_FORK; args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - /* - * Determine space new attribute will use, and if it would be - * "local" or "remote" (note: local != inline). - */ - size = xfs_attr_leaf_newentsize(name->len, valuelen, - mp->m_sb.sb_blocksize, &local); - - nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); - if (local) { - if (size > (mp->m_sb.sb_blocksize >> 1)) { - /* Double split possible */ - nblks <<= 1; - } - } else { - uint dblocks = XFS_B_TO_FSB(mp, valuelen); - /* Out of line attribute, cannot double split, but make - * room for the attribute value itself. - */ - nblks += dblocks; - nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); - } - /* Size is now blocks for attribute data */ - args.total = nblks; + args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local); /* * Start our first transaction of the day. @@ -286,18 +303,17 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, if (rsvd) args.trans->t_flags |= XFS_TRANS_RESERVE; - if ((error = xfs_trans_reserve(args.trans, (uint) nblks, - XFS_ATTRSET_LOG_RES(mp, nblks), - 0, XFS_TRANS_PERM_LOG_RES, - XFS_ATTRSET_LOG_COUNT))) { + if ((error = xfs_trans_reserve(args.trans, args.total, + XFS_ATTRSET_LOG_RES(mp, args.total), 0, + XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) { xfs_trans_cancel(args.trans, 0); return(error); } xfs_ilock(dp, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0, - rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : - XFS_QMOPT_RES_REGBLKS); + error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0, + rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : + XFS_QMOPT_RES_REGBLKS); if (error) { xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); @@ -384,7 +400,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, * Commit the leaf transformation. We'll need another (linked) * transaction to add the new attribute to the leaf. */ - if ((error = xfs_attr_rolltrans(&args.trans, dp))) + + error = xfs_trans_roll(&args.trans, dp); + if (error) goto out; } @@ -964,7 +982,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * Commit the current trans (including the inode) and start * a new one. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) return (error); /* @@ -978,7 +997,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * Commit the transaction that added the attr name so that * later routines can manage their own transactions. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) return (error); /* @@ -1067,7 +1087,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) /* * Commit the remove and start the next trans in series. */ - error = xfs_attr_rolltrans(&args->trans, dp); + error = xfs_trans_roll(&args->trans, dp); } else if (args->rmtblkno > 0) { /* @@ -1298,7 +1318,8 @@ restart: * Commit the node conversion and start the next * trans in the chain. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) goto out; goto restart; @@ -1349,7 +1370,8 @@ restart: * Commit the leaf addition or btree split and start the next * trans in the chain. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) goto out; /* @@ -1449,7 +1471,8 @@ restart: /* * Commit and start the next trans in the chain. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) goto out; } else if (args->rmtblkno > 0) { @@ -1581,7 +1604,8 @@ xfs_attr_node_removename(xfs_da_args_t *args) /* * Commit the Btree join operation and start a new trans. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) goto out; } @@ -2082,7 +2106,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) /* * Start the next trans in the chain. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) return (error); } @@ -2232,7 +2257,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) /* * Close out trans and start the next one in the chain. */ - if ((error = xfs_attr_rolltrans(&args->trans, args->dp))) + error = xfs_trans_roll(&args->trans, args->dp); + if (error) return (error); } return(0); diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index 8b2d31c19e4d..fb3b2a68b9b9 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -129,6 +129,7 @@ typedef struct xfs_attr_list_context { /* * Overall external interface routines. */ +int xfs_attr_calc_size(struct xfs_inode *, int, int, int *); int xfs_attr_inactive(struct xfs_inode *dp); int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int); int xfs_attr_rmtval_get(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 23ef5d7c87e1..79da6b2ea99e 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -2498,9 +2498,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) /* * Commit the flag value change and start the next trans in series. */ - error = xfs_attr_rolltrans(&args->trans, args->dp); - - return(error); + return xfs_trans_roll(&args->trans, args->dp); } /* @@ -2547,9 +2545,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) /* * Commit the flag value change and start the next trans in series. */ - error = xfs_attr_rolltrans(&args->trans, args->dp); - - return(error); + return xfs_trans_roll(&args->trans, args->dp); } /* @@ -2665,7 +2661,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) /* * Commit the flag value change and start the next trans in series. */ - error = xfs_attr_rolltrans(&args->trans, args->dp); + error = xfs_trans_roll(&args->trans, args->dp); return(error); } @@ -2723,7 +2719,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) /* * Commit the invalidate and start the next transaction. */ - error = xfs_attr_rolltrans(trans, dp); + error = xfs_trans_roll(trans, dp); return (error); } @@ -2825,7 +2821,8 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, /* * Atomically commit the whole invalidate stuff. */ - if ((error = xfs_attr_rolltrans(trans, dp))) + error = xfs_trans_roll(trans, dp); + if (error) return (error); } @@ -2964,7 +2961,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, /* * Roll to next transaction. */ - if ((error = xfs_attr_rolltrans(trans, dp))) + error = xfs_trans_roll(trans, dp); + if (error) return (error); } @@ -2974,60 +2972,3 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, return(0); } - - -/* - * Roll from one trans in the sequence of PERMANENT transactions to the next. - */ -int -xfs_attr_rolltrans(xfs_trans_t **transp, xfs_inode_t *dp) -{ - xfs_trans_t *trans; - unsigned int logres, count; - int error; - - /* - * Ensure that the inode is always logged. - */ - trans = *transp; - xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); - - /* - * Copy the critical parameters from one trans to the next. - */ - logres = trans->t_log_res; - count = trans->t_log_count; - *transp = xfs_trans_dup(trans); - - /* - * Commit the current transaction. - * If this commit failed, then it'd just unlock those items that - * are not marked ihold. That also means that a filesystem shutdown - * is in progress. The caller takes the responsibility to cancel - * the duplicate transaction that gets returned. - */ - if ((error = xfs_trans_commit(trans, 0))) - return (error); - - trans = *transp; - - /* - * Reserve space in the log for th next transaction. - * This also pushes items in the "AIL", the list of logged items, - * out to disk if they are taking up space at the tail of the log - * that we want to use. This requires that either nothing be locked - * across this call, or that anything that is locked be logged in - * the prior and the next transactions. - */ - error = xfs_trans_reserve(trans, 0, logres, 0, - XFS_TRANS_PERM_LOG_RES, count); - /* - * Ensure that the inode is in the new transaction and locked. - */ - if (!error) { - xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(trans, dp); - } - return (error); - -} diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index 5ecf437b7825..83e9af417ca2 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -274,6 +274,4 @@ int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp, struct xfs_dabuf *leaf2_bp); int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local); -int xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp); - #endif /* __XFS_ATTR_LEAF_H__ */ diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index fab0b6d5a41b..48228848f5ae 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -25,109 +25,6 @@ * XFS bit manipulation routines, used in non-realtime code. */ -#ifndef HAVE_ARCH_HIGHBIT -/* - * Index of high bit number in byte, -1 for none set, 0..7 otherwise. - */ -static const char xfs_highbit[256] = { - -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */ - 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */ - 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */ - 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */ - 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */ - 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */ - 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */ - 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */ - 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */ -}; -#endif - -/* - * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. - */ -inline int -xfs_highbit32( - __uint32_t v) -{ -#ifdef HAVE_ARCH_HIGHBIT - return highbit32(v); -#else - int i; - - if (v & 0xffff0000) - if (v & 0xff000000) - i = 24; - else - i = 16; - else if (v & 0x0000ffff) - if (v & 0x0000ff00) - i = 8; - else - i = 0; - else - return -1; - return i + xfs_highbit[(v >> i) & 0xff]; -#endif -} - -/* - * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set. - */ -int -xfs_lowbit64( - __uint64_t v) -{ - __uint32_t w = (__uint32_t)v; - int n = 0; - - if (w) { /* lower bits */ - n = ffs(w); - } else { /* upper bits */ - w = (__uint32_t)(v >> 32); - if (w && (n = ffs(w))) - n += 32; - } - return n - 1; -} - -/* - * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set. - */ -int -xfs_highbit64( - __uint64_t v) -{ - __uint32_t h = (__uint32_t)(v >> 32); - - if (h) - return xfs_highbit32(h) + 32; - return xfs_highbit32((__uint32_t)v); -} - - /* * Return whether bitmap is empty. * Size is number of words in the bitmap, which is padded to word boundary diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index 082641a9782c..8e0e463dae2d 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -47,13 +47,39 @@ static inline __uint64_t xfs_mask64lo(int n) } /* Get high bit set out of 32-bit argument, -1 if none set */ -extern int xfs_highbit32(__uint32_t v); +static inline int xfs_highbit32(__uint32_t v) +{ + return fls(v) - 1; +} + +/* Get high bit set out of 64-bit argument, -1 if none set */ +static inline int xfs_highbit64(__uint64_t v) +{ + return fls64(v) - 1; +} + +/* Get low bit set out of 32-bit argument, -1 if none set */ +static inline int xfs_lowbit32(__uint32_t v) +{ + unsigned long t = v; + return (v) ? find_first_bit(&t, 32) : -1; +} /* Get low bit set out of 64-bit argument, -1 if none set */ -extern int xfs_lowbit64(__uint64_t v); +static inline int xfs_lowbit64(__uint64_t v) +{ + __uint32_t w = (__uint32_t)v; + int n = 0; -/* Get high bit set out of 64-bit argument, -1 if none set */ -extern int xfs_highbit64(__uint64_t); + if (w) { /* lower bits */ + n = ffs(w); + } else { /* upper bits */ + w = (__uint32_t)(v >> 32); + if (w && (n = ffs(w))) + n += 32; + } + return n - 1; +} /* Return whether bitmap is empty (1 == empty) */ extern int xfs_bitmap_empty(uint *map, uint size); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 3c4beb3a4326..a1aab9275d5a 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -384,14 +384,14 @@ xfs_bmap_count_tree( int levelin, int *count); -STATIC int +STATIC void xfs_bmap_count_leaves( xfs_ifork_t *ifp, xfs_extnum_t idx, int numrecs, int *count); -STATIC int +STATIC void xfs_bmap_disk_count_leaves( xfs_extnum_t idx, xfs_bmbt_block_t *block, @@ -4000,7 +4000,7 @@ xfs_bmap_add_attrfork( ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; } ASSERT(ip->i_d.di_anextents == 0); - VN_HOLD(XFS_ITOV(ip)); + IHOLD(ip); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); switch (ip->i_d.di_format) { @@ -6096,7 +6096,7 @@ xfs_bmap_get_bp( tp = cur->bc_tp; licp = &tp->t_items; while (!bp && licp != NULL) { - if (XFS_LIC_ARE_ALL_FREE(licp)) { + if (xfs_lic_are_all_free(licp)) { licp = licp->lic_next; continue; } @@ -6106,11 +6106,11 @@ xfs_bmap_get_bp( xfs_buf_log_item_t *bip; xfs_buf_t *lbp; - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } - lidp = XFS_LIC_SLOT(licp, i); + lidp = xfs_lic_slot(licp, i); lip = lidp->lid_item; if (lip->li_type != XFS_LI_BUF) continue; @@ -6367,13 +6367,9 @@ xfs_bmap_count_blocks( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { - if (unlikely(xfs_bmap_count_leaves(ifp, 0, + xfs_bmap_count_leaves(ifp, 0, ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), - count) < 0)) { - XFS_ERROR_REPORT("xfs_bmap_count_blocks(1)", - XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); - } + count); return 0; } @@ -6454,13 +6450,7 @@ xfs_bmap_count_tree( for (;;) { nextbno = be64_to_cpu(block->bb_rightsib); numrecs = be16_to_cpu(block->bb_numrecs); - if (unlikely(xfs_bmap_disk_count_leaves(0, - block, numrecs, count) < 0)) { - xfs_trans_brelse(tp, bp); - XFS_ERROR_REPORT("xfs_bmap_count_tree(2)", - XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); - } + xfs_bmap_disk_count_leaves(0, block, numrecs, count); xfs_trans_brelse(tp, bp); if (nextbno == NULLFSBLOCK) break; @@ -6478,7 +6468,7 @@ xfs_bmap_count_tree( /* * Count leaf blocks given a range of extent records. */ -STATIC int +STATIC void xfs_bmap_count_leaves( xfs_ifork_t *ifp, xfs_extnum_t idx, @@ -6491,14 +6481,13 @@ xfs_bmap_count_leaves( xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); *count += xfs_bmbt_get_blockcount(frp); } - return 0; } /* * Count leaf blocks given a range of extent records originally * in btree format. */ -STATIC int +STATIC void xfs_bmap_disk_count_leaves( xfs_extnum_t idx, xfs_bmbt_block_t *block, @@ -6512,5 +6501,4 @@ xfs_bmap_disk_count_leaves( frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); *count += xfs_bmbt_disk_get_blockcount(frp); } - return 0; } diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index aeb87ca69fcc..cc593a84c345 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -46,38 +46,11 @@ kmem_zone_t *xfs_btree_cur_zone; /* * Btree magic numbers. */ -const __uint32_t xfs_magics[XFS_BTNUM_MAX] = -{ +const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }; /* - * Prototypes for internal routines. - */ - -/* - * Checking routine: return maxrecs for the block. - */ -STATIC int /* number of records fitting in block */ -xfs_btree_maxrecs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_block_t *block);/* generic btree block pointer */ - -/* - * Internal routines. - */ - -/* - * Retrieve the block pointer from the cursor at the given level. - * This may be a bmap btree root or from a buffer. - */ -STATIC xfs_btree_block_t * /* generic btree block pointer */ -xfs_btree_get_block( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree */ - struct xfs_buf **bpp); /* buffer containing the block */ - -/* * Checking routine: return maxrecs for the block. */ STATIC int /* number of records fitting in block */ @@ -457,35 +430,6 @@ xfs_btree_dup_cursor( } /* - * Change the cursor to point to the first record at the given level. - * Other levels are unaffected. - */ -int /* success=1, failure=0 */ -xfs_btree_firstrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level) /* level to change */ -{ - xfs_btree_block_t *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ - - /* - * Get the block pointer for this level. - */ - block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); - /* - * It's empty, there is no such record. - */ - if (!block->bb_h.bb_numrecs) - return 0; - /* - * Set the ptr value to 1, that's the first record/key. - */ - cur->bc_ptrs[level] = 1; - return 1; -} - -/* * Retrieve the block pointer from the cursor at the given level. * This may be a bmap btree root or from a buffer. */ @@ -626,6 +570,13 @@ xfs_btree_init_cursor( cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; break; + case XFS_BTNUM_INO: + /* + * Inode allocation btree fields. + */ + cur->bc_private.a.agbp = agbp; + cur->bc_private.a.agno = agno; + break; case XFS_BTNUM_BMAP: /* * Bmap btree fields. @@ -638,13 +589,6 @@ xfs_btree_init_cursor( cur->bc_private.b.flags = 0; cur->bc_private.b.whichfork = whichfork; break; - case XFS_BTNUM_INO: - /* - * Inode allocation btree fields. - */ - cur->bc_private.i.agbp = agbp; - cur->bc_private.i.agno = agno; - break; default: ASSERT(0); } @@ -671,6 +615,35 @@ xfs_btree_islastblock( } /* + * Change the cursor to point to the first record at the given level. + * Other levels are unaffected. + */ +int /* success=1, failure=0 */ +xfs_btree_firstrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level) /* level to change */ +{ + xfs_btree_block_t *block; /* generic btree block pointer */ + xfs_buf_t *bp; /* buffer containing block */ + + /* + * Get the block pointer for this level. + */ + block = xfs_btree_get_block(cur, level, &bp); + xfs_btree_check_block(cur, block, level, bp); + /* + * It's empty, there is no such record. + */ + if (!block->bb_h.bb_numrecs) + return 0; + /* + * Set the ptr value to 1, that's the first record/key. + */ + cur->bc_ptrs[level] = 1; + return 1; +} + +/* * Change the cursor to point to the last record in the current block * at the given level. Other levels are unaffected. */ @@ -890,12 +863,12 @@ xfs_btree_readahead_core( case XFS_BTNUM_INO: i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, be32_to_cpu(i->bb_leftsib), 1); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, be32_to_cpu(i->bb_rightsib), 1); rval++; } diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7440b78f9cec..1f528a2a3754 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -158,8 +158,8 @@ typedef struct xfs_btree_cur __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ xfs_btnum_t bc_btnum; /* identifies which btree type */ union { - struct { /* needed for BNO, CNT */ - struct xfs_buf *agbp; /* agf buffer pointer */ + struct { /* needed for BNO, CNT, INO */ + struct xfs_buf *agbp; /* agf/agi buffer pointer */ xfs_agnumber_t agno; /* ag number */ } a; struct { /* needed for BMAP */ @@ -172,10 +172,6 @@ typedef struct xfs_btree_cur char flags; /* flags */ #define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */ } b; - struct { /* needed for INO */ - struct xfs_buf *agbp; /* agi buffer pointer */ - xfs_agnumber_t agno; /* ag number */ - } i; } bc_private; /* per-btree type data */ } xfs_btree_cur_t; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index d86ca2c03a70..608c30c3f76b 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -737,7 +737,7 @@ xfs_buf_item_init( bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); bip->bli_format.blf_map_size = map_size; #ifdef XFS_BLI_TRACE - bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_SLEEP); + bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS); #endif #ifdef XFS_TRANS_DEBUG @@ -1056,7 +1056,7 @@ xfs_buf_iodone_callbacks( anyway. */ XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); XFS_BUF_DONE(bp); - XFS_BUF_V_IODONESEMA(bp); + XFS_BUF_FINISH_IOWAIT(bp); } return; } diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 2211e885ef24..760f4c5b5160 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -128,10 +128,8 @@ xfs_swap_extents( xfs_swapext_t *sxp) { xfs_mount_t *mp; - xfs_inode_t *ips[2]; xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; - bhv_vnode_t *vp, *tvp; xfs_ifork_t *tempifp, *ifp, *tifp; int ilf_fields, tilf_fields; static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; @@ -150,19 +148,8 @@ xfs_swap_extents( } sbp = &sxp->sx_stat; - vp = XFS_ITOV(ip); - tvp = XFS_ITOV(tip); - - /* Lock in i_ino order */ - if (ip->i_ino < tip->i_ino) { - ips[0] = ip; - ips[1] = tip; - } else { - ips[0] = tip; - ips[1] = ip; - } - xfs_lock_inodes(ips, 2, lock_flags); + xfs_lock_two_inodes(ip, tip, lock_flags); locked = 1; /* Verify that both files have the same format */ @@ -184,7 +171,7 @@ xfs_swap_extents( goto error0; } - if (VN_CACHED(tvp) != 0) { + if (VN_CACHED(VFS_I(tip)) != 0) { xfs_inval_cached_trace(tip, 0, -1, 0, -1); error = xfs_flushinval_pages(tip, 0, -1, FI_REMAPF_LOCKED); @@ -193,7 +180,7 @@ xfs_swap_extents( } /* Verify O_DIRECT for ftmp */ - if (VN_CACHED(tvp) != 0) { + if (VN_CACHED(VFS_I(tip)) != 0) { error = XFS_ERROR(EINVAL); goto error0; } @@ -237,7 +224,7 @@ xfs_swap_extents( * vop_read (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ - if (VN_MAPPED(vp)) { + if (VN_MAPPED(VFS_I(ip))) { error = XFS_ERROR(EBUSY); goto error0; } @@ -265,7 +252,7 @@ xfs_swap_extents( locked = 0; goto error0; } - xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks @@ -350,15 +337,11 @@ xfs_swap_extents( break; } - /* - * Increment vnode ref counts since xfs_trans_commit & - * xfs_trans_cancel will both unlock the inodes and - * decrement the associated ref counts. - */ - VN_HOLD(vp); - VN_HOLD(tvp); + IHOLD(ip); xfs_trans_ijoin(tp, ip, lock_flags); + + IHOLD(tip); xfs_trans_ijoin(tp, tip, lock_flags); xfs_trans_log_inode(tp, ip, ilf_fields); diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index cdc2d3464a1a..2813cdd72375 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -18,7 +18,6 @@ #ifndef __XFS_DMAPI_H__ #define __XFS_DMAPI_H__ -#include <linux/version.h> /* Values used to define the on-disk version of dm_attrname_t. All * on-disk attribute names start with the 8-byte string "SGI_DMI_". * diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index f66756cfb5e8..f227ecd1a294 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -58,9 +58,6 @@ xfs_error_trap(int e) } return e; } -#endif - -#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) int xfs_etest[XFS_NUM_INJECT_ERROR]; int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; @@ -154,7 +151,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) return 0; } -#endif /* DEBUG || INDUCE_IO_ERROR */ +#endif /* DEBUG */ static void xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap) diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index d8559d132efa..11543f10b0c6 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -125,22 +125,14 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp, #define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT -#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) +#ifdef DEBUG extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); #define XFS_NUM_INJECT_ERROR 10 - -#ifdef __ANSI_CPP__ -#define XFS_TEST_ERROR(expr, mp, tag, rf) \ - ((expr) || \ - xfs_error_test((tag), (mp)->m_fixedfsid, #expr, __LINE__, __FILE__, \ - (rf))) -#else #define XFS_TEST_ERROR(expr, mp, tag, rf) \ ((expr) || \ xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ (rf))) -#endif /* __ANSI_CPP__ */ extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); @@ -148,7 +140,7 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) #define xfs_errortag_add(tag, mp) (ENOSYS) #define xfs_errortag_clearall(mp, loud) (ENOSYS) -#endif /* (DEBUG || INDUCE_IO_ERROR) */ +#endif /* DEBUG */ /* * XFS panic tags -- allow a call to xfs_cmn_err() be turned into diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index c38fd14fca29..f3bb75da384e 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -400,7 +400,7 @@ xfs_filestream_init(void) if (!item_zone) return -ENOMEM; #ifdef XFS_FILESTREAMS_TRACE - xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP); + xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS); #endif return 0; } diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index e5310c90e50f..83502f3edef0 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -181,7 +181,7 @@ xfs_inobt_delrec( * then we can get rid of this level. */ if (numrecs == 1 && level > 0) { - agbp = cur->bc_private.i.agbp; + agbp = cur->bc_private.a.agbp; agi = XFS_BUF_TO_AGI(agbp); /* * pp is still set to the first pointer in the block. @@ -194,7 +194,7 @@ xfs_inobt_delrec( * Free the block. */ if ((error = xfs_free_extent(cur->bc_tp, - XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1))) + XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1))) return error; xfs_trans_binval(cur->bc_tp, bp); xfs_ialloc_log_agi(cur->bc_tp, agbp, @@ -379,7 +379,7 @@ xfs_inobt_delrec( rrecs = be16_to_cpu(right->bb_numrecs); rbp = bp; if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.i.agno, lbno, 0, &lbp, + cur->bc_private.a.agno, lbno, 0, &lbp, XFS_INO_BTREE_REF))) return error; left = XFS_BUF_TO_INOBT_BLOCK(lbp); @@ -401,7 +401,7 @@ xfs_inobt_delrec( lrecs = be16_to_cpu(left->bb_numrecs); lbp = bp; if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.i.agno, rbno, 0, &rbp, + cur->bc_private.a.agno, rbno, 0, &rbp, XFS_INO_BTREE_REF))) return error; right = XFS_BUF_TO_INOBT_BLOCK(rbp); @@ -484,7 +484,7 @@ xfs_inobt_delrec( xfs_buf_t *rrbp; if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0, + cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, &rrbp, XFS_INO_BTREE_REF))) return error; rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); @@ -497,7 +497,7 @@ xfs_inobt_delrec( * Free the deleting block. */ if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp, - cur->bc_private.i.agno, rbno), 1))) + cur->bc_private.a.agno, rbno), 1))) return error; xfs_trans_binval(cur->bc_tp, rbp); /* @@ -854,7 +854,7 @@ xfs_inobt_lookup( { xfs_agi_t *agi; /* a.g. inode header */ - agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); + agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); agno = be32_to_cpu(agi->agi_seqno); agbno = be32_to_cpu(agi->agi_root); } @@ -1089,7 +1089,7 @@ xfs_inobt_lshift( * Set up the left neighbor as "left". */ if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib), + cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib), 0, &lbp, XFS_INO_BTREE_REF))) return error; left = XFS_BUF_TO_INOBT_BLOCK(lbp); @@ -1207,10 +1207,10 @@ xfs_inobt_newroot( /* * Get a block & a buffer. */ - agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); + agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); args.tp = cur->bc_tp; args.mp = cur->bc_mp; - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, be32_to_cpu(agi->agi_root)); args.mod = args.minleft = args.alignment = args.total = args.wasdel = args.isfl = args.userdata = args.minalignslop = 0; @@ -1233,7 +1233,7 @@ xfs_inobt_newroot( */ agi->agi_root = cpu_to_be32(args.agbno); be32_add_cpu(&agi->agi_level, 1); - xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp, + xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); /* * At the previous root level there are now two blocks: the old @@ -1376,7 +1376,7 @@ xfs_inobt_rshift( * Set up the right neighbor as "right". */ if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), + cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, &rbp, XFS_INO_BTREE_REF))) return error; right = XFS_BUF_TO_INOBT_BLOCK(rbp); @@ -1492,7 +1492,7 @@ xfs_inobt_split( * Allocate the new block. * If we can't do it, we're toast. Give up. */ - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno); args.mod = args.minleft = args.alignment = args.total = args.wasdel = args.isfl = args.userdata = args.minalignslop = 0; args.minlen = args.maxlen = args.prod = 1; @@ -1725,7 +1725,7 @@ xfs_inobt_decrement( agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.i.agno, agbno, 0, &bp, + cur->bc_private.a.agno, agbno, 0, &bp, XFS_INO_BTREE_REF))) return error; lev--; @@ -1897,7 +1897,7 @@ xfs_inobt_increment( agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.i.agno, agbno, 0, &bp, + cur->bc_private.a.agno, agbno, 0, &bp, XFS_INO_BTREE_REF))) return error; lev--; diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b07604b94d9f..e229e9e001c2 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -216,7 +216,14 @@ finish_inode: mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); init_waitqueue_head(&ip->i_ipin_wait); atomic_set(&ip->i_pincount, 0); - initnsema(&ip->i_flock, 1, "xfsfino"); + + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&ip->i_flush); + complete(&ip->i_flush); if (lock_flags) xfs_ilock(ip, lock_flags); @@ -288,10 +295,17 @@ finish_inode: *ipp = ip; /* + * Set up the Linux with the Linux inode. + */ + ip->i_vnode = inode; + inode->i_private = ip; + + /* * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ - xfs_initialize_vnode(mp, inode, ip); + if (ip->i_d.di_mode != 0) + xfs_setup_inode(ip); return 0; } @@ -411,10 +425,11 @@ xfs_iput(xfs_inode_t *ip, * Special iput for brand-new inodes that are still locked */ void -xfs_iput_new(xfs_inode_t *ip, - uint lock_flags) +xfs_iput_new( + xfs_inode_t *ip, + uint lock_flags) { - struct inode *inode = ip->i_vnode; + struct inode *inode = VFS_I(ip); xfs_itrace_entry(ip); @@ -775,26 +790,3 @@ xfs_isilocked( } #endif -/* - * The following three routines simply manage the i_flock - * semaphore embedded in the inode. This semaphore synchronizes - * processes attempting to flush the in-core inode back to disk. - */ -void -xfs_iflock(xfs_inode_t *ip) -{ - psema(&(ip->i_flock), PINOD|PLTWAIT); -} - -int -xfs_iflock_nowait(xfs_inode_t *ip) -{ - return (cpsema(&(ip->i_flock))); -} - -void -xfs_ifunlock(xfs_inode_t *ip) -{ - ASSERT(issemalocked(&(ip->i_flock))); - vsema(&(ip->i_flock)); -} diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index bedc66163176..00e80df9dd9d 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -580,8 +580,8 @@ xfs_iformat_extents( xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); for (i = 0; i < nex; i++, dp++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); - ep->l0 = be64_to_cpu(get_unaligned(&dp->l0)); - ep->l1 = be64_to_cpu(get_unaligned(&dp->l1)); + ep->l0 = get_unaligned_be64(&dp->l0); + ep->l1 = get_unaligned_be64(&dp->l1); } XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); if (whichfork != XFS_DATA_FORK || @@ -835,22 +835,22 @@ xfs_iread( * Do this before xfs_iformat in case it adds entries. */ #ifdef XFS_INODE_TRACE - ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP); + ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); #endif #ifdef XFS_BMAP_TRACE - ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); + ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); #endif #ifdef XFS_BMBT_TRACE - ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); + ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); #endif #ifdef XFS_RW_TRACE - ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); + ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); #endif #ifdef XFS_ILOCK_TRACE - ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); + ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); #endif #ifdef XFS_DIR2_TRACE - ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); + ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); #endif /* @@ -1046,9 +1046,9 @@ xfs_ialloc( { xfs_ino_t ino; xfs_inode_t *ip; - bhv_vnode_t *vp; uint flags; int error; + timespec_t tv; /* * Call the space management code to pick @@ -1077,13 +1077,12 @@ xfs_ialloc( } ASSERT(ip != NULL); - vp = XFS_ITOV(ip); ip->i_d.di_mode = (__uint16_t)mode; ip->i_d.di_onlink = 0; ip->i_d.di_nlink = nlink; ASSERT(ip->i_d.di_nlink == nlink); - ip->i_d.di_uid = current_fsuid(cr); - ip->i_d.di_gid = current_fsgid(cr); + ip->i_d.di_uid = current_fsuid(); + ip->i_d.di_gid = current_fsgid(); ip->i_d.di_projid = prid; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); @@ -1130,7 +1129,13 @@ xfs_ialloc( ip->i_size = 0; ip->i_d.di_nextents = 0; ASSERT(ip->i_d.di_nblocks == 0); - xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); + + nanotime(&tv); + ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; + ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; + ip->i_d.di_atime = ip->i_d.di_mtime; + ip->i_d.di_ctime = ip->i_d.di_mtime; + /* * di_gen will have been taken care of in xfs_iread. */ @@ -1220,7 +1225,7 @@ xfs_ialloc( xfs_trans_log_inode(tp, ip, flags); /* now that we have an i_mode we can setup inode ops and unlock */ - xfs_initialize_vnode(tp->t_mountp, vp, ip); + xfs_setup_inode(ip); *ipp = ip; return 0; @@ -1399,7 +1404,6 @@ xfs_itruncate_start( xfs_fsize_t last_byte; xfs_off_t toss_start; xfs_mount_t *mp; - bhv_vnode_t *vp; int error = 0; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); @@ -1408,7 +1412,6 @@ xfs_itruncate_start( (flags == XFS_ITRUNC_MAYBE)); mp = ip->i_mount; - vp = XFS_ITOV(ip); /* wait for the completion of any pending DIOs */ if (new_size < ip->i_size) @@ -1457,7 +1460,7 @@ xfs_itruncate_start( #ifdef DEBUG if (new_size == 0) { - ASSERT(VN_CACHED(vp) == 0); + ASSERT(VN_CACHED(VFS_I(ip)) == 0); } #endif return error; @@ -2630,7 +2633,6 @@ xfs_idestroy( xfs_idestroy_fork(ip, XFS_ATTR_FORK); mrfree(&ip->i_lock); mrfree(&ip->i_iolock); - freesema(&ip->i_flock); #ifdef XFS_INODE_TRACE ktrace_free(ip->i_trace); @@ -3048,10 +3050,10 @@ cluster_corrupt_out: /* * xfs_iflush() will write a modified inode's changes out to the * inode's on disk home. The caller must have the inode lock held - * in at least shared mode and the inode flush semaphore must be - * held as well. The inode lock will still be held upon return from + * in at least shared mode and the inode flush completion must be + * active as well. The inode lock will still be held upon return from * the call and the caller is free to unlock it. - * The inode flush lock will be unlocked when the inode reaches the disk. + * The inode flush will be completed when the inode reaches the disk. * The flags indicate how the inode's buffer should be written out. */ int @@ -3070,7 +3072,7 @@ xfs_iflush( XFS_STATS_INC(xs_iflush_count); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); - ASSERT(issemalocked(&(ip->i_flock))); + ASSERT(!completion_done(&ip->i_flush)); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3233,7 +3235,7 @@ xfs_iflush_int( #endif ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); - ASSERT(issemalocked(&(ip->i_flock))); + ASSERT(!completion_done(&ip->i_flush)); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3465,7 +3467,6 @@ xfs_iflush_all( xfs_mount_t *mp) { xfs_inode_t *ip; - bhv_vnode_t *vp; again: XFS_MOUNT_ILOCK(mp); @@ -3480,14 +3481,13 @@ xfs_iflush_all( continue; } - vp = XFS_ITOV_NULL(ip); - if (!vp) { + if (!VFS_I(ip)) { XFS_MOUNT_IUNLOCK(mp); xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); goto again; } - ASSERT(vn_count(vp) == 0); + ASSERT(vn_count(VFS_I(ip)) == 0); ip = ip->i_mnext; } while (ip != mp->m_inodes); @@ -3707,7 +3707,7 @@ xfs_iext_add_indirect_multi( * (all extents past */ if (nex2) { byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); - nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); + nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); erp->er_extcount -= nex2; xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); @@ -4007,8 +4007,7 @@ xfs_iext_realloc_direct( ifp->if_u1.if_extents = kmem_realloc(ifp->if_u1.if_extents, rnew_size, - ifp->if_real_bytes, - KM_SLEEP); + ifp->if_real_bytes, KM_NOFS); } if (rnew_size > ifp->if_real_bytes) { memset(&ifp->if_u1.if_extents[ifp->if_bytes / @@ -4067,7 +4066,7 @@ xfs_iext_inline_to_direct( xfs_ifork_t *ifp, /* inode fork pointer */ int new_size) /* number of extents in file */ { - ifp->if_u1.if_extents = kmem_alloc(new_size, KM_SLEEP); + ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); memset(ifp->if_u1.if_extents, 0, new_size); if (ifp->if_bytes) { memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, @@ -4099,7 +4098,7 @@ xfs_iext_realloc_indirect( } else { ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) kmem_realloc(ifp->if_u1.if_ext_irec, - new_size, size, KM_SLEEP); + new_size, size, KM_NOFS); } } @@ -4341,11 +4340,10 @@ xfs_iext_irec_init( nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); ASSERT(nextents <= XFS_LINEAR_EXTS); - erp = (xfs_ext_irec_t *) - kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP); + erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); if (nextents == 0) { - ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); + ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); } else if (!ifp->if_real_bytes) { xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { @@ -4393,7 +4391,7 @@ xfs_iext_irec_new( /* Initialize new extent record */ erp = ifp->if_u1.if_ext_irec; - erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); + erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); erp[erp_idx].er_extcount = 0; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 17a04b6321ed..1420c49674d7 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -87,8 +87,7 @@ typedef struct xfs_ifork { * Flags for xfs_ichgtime(). */ #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ -#define XFS_ICHGTIME_ACC 0x2 /* data fork access timestamp */ -#define XFS_ICHGTIME_CHG 0x4 /* inode field change timestamp */ +#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ /* * Per-fork incore inode flags. @@ -204,7 +203,7 @@ typedef struct xfs_inode { struct xfs_inode *i_mprev; /* ptr to prev inode */ struct xfs_mount *i_mount; /* fs mount struct ptr */ struct list_head i_reclaim; /* reclaim list */ - bhv_vnode_t *i_vnode; /* vnode backpointer */ + struct inode *i_vnode; /* vnode backpointer */ struct xfs_dquot *i_udquot; /* user dquot */ struct xfs_dquot *i_gdquot; /* group dquot */ @@ -223,7 +222,7 @@ typedef struct xfs_inode { struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ mrlock_t i_iolock; /* inode IO lock */ - sema_t i_flock; /* inode flush lock */ + struct completion i_flush; /* inode flush completion q */ atomic_t i_pincount; /* inode pin count */ wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ spinlock_t i_flags_lock; /* inode i_flags lock */ @@ -263,6 +262,18 @@ typedef struct xfs_inode { #define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ (ip)->i_size : (ip)->i_d.di_size; +/* Convert from vfs inode to xfs inode */ +static inline struct xfs_inode *XFS_I(struct inode *inode) +{ + return (struct xfs_inode *)inode->i_private; +} + +/* convert from xfs inode to vfs inode */ +static inline struct inode *VFS_I(struct xfs_inode *ip) +{ + return (struct inode *)ip->i_vnode; +} + /* * i_flags helper functions */ @@ -439,9 +450,6 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) #define XFS_ITRUNC_DEFINITE 0x1 #define XFS_ITRUNC_MAYBE 0x2 -#define XFS_ITOV(ip) ((ip)->i_vnode) -#define XFS_ITOV_NULL(ip) ((ip)->i_vnode) - /* * For multiple groups support: if S_ISGID bit is set in the parent * directory, group of new file is set to that of the parent, and @@ -473,11 +481,8 @@ int xfs_ilock_nowait(xfs_inode_t *, uint); void xfs_iunlock(xfs_inode_t *, uint); void xfs_ilock_demote(xfs_inode_t *, uint); int xfs_isilocked(xfs_inode_t *, uint); -void xfs_iflock(xfs_inode_t *); -int xfs_iflock_nowait(xfs_inode_t *); uint xfs_ilock_map_shared(xfs_inode_t *); void xfs_iunlock_map_shared(xfs_inode_t *, uint); -void xfs_ifunlock(xfs_inode_t *); void xfs_ireclaim(xfs_inode_t *); int xfs_finish_reclaim(xfs_inode_t *, int, int); int xfs_finish_reclaim_all(struct xfs_mount *, int); @@ -522,6 +527,7 @@ void xfs_iflush_all(struct xfs_mount *); void xfs_ichgtime(xfs_inode_t *, int); xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); void xfs_lock_inodes(xfs_inode_t **, int, uint); +void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); void xfs_synchronize_atime(xfs_inode_t *); void xfs_mark_inode_dirty_sync(xfs_inode_t *); @@ -570,6 +576,26 @@ extern struct kmem_zone *xfs_ifork_zone; extern struct kmem_zone *xfs_inode_zone; extern struct kmem_zone *xfs_ili_zone; +/* + * Manage the i_flush queue embedded in the inode. This completion + * queue synchronizes processes attempting to flush the in-core + * inode back to disk. + */ +static inline void xfs_iflock(xfs_inode_t *ip) +{ + wait_for_completion(&ip->i_flush); +} + +static inline int xfs_iflock_nowait(xfs_inode_t *ip) +{ + return try_wait_for_completion(&ip->i_flush); +} + +static inline void xfs_ifunlock(xfs_inode_t *ip) +{ + complete(&ip->i_flush); +} + #endif /* __KERNEL__ */ #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 0eee08a32c26..97c7452e2620 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -779,11 +779,10 @@ xfs_inode_item_pushbuf( ASSERT(iip->ili_push_owner == current_pid()); /* - * If flushlock isn't locked anymore, chances are that the - * inode flush completed and the inode was taken off the AIL. - * So, just get out. + * If a flush is not in progress anymore, chances are that the + * inode was taken off the AIL. So, just get out. */ - if (!issemalocked(&(ip->i_flock)) || + if (completion_done(&ip->i_flush) || ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -805,7 +804,7 @@ xfs_inode_item_pushbuf( * If not, we can flush it async. */ dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && - issemalocked(&(ip->i_flock))); + !completion_done(&ip->i_flush)); iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buftrace("INODE ITEM PUSH", bp); @@ -858,7 +857,7 @@ xfs_inode_item_push( ip = iip->ili_inode; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); - ASSERT(issemalocked(&(ip->i_flock))); + ASSERT(!completion_done(&ip->i_flush)); /* * Since we were able to lock the inode's flush lock and * we found it on the AIL, the inode must be dirty. This diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 9a3ef9dcaeb9..cf6754a3c5b3 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -59,7 +59,6 @@ xfs_bulkstat_one_iget( { xfs_icdinode_t *dic; /* dinode core info pointer */ xfs_inode_t *ip; /* incore inode pointer */ - bhv_vnode_t *vp; int error; error = xfs_iget(mp, NULL, ino, @@ -72,7 +71,6 @@ xfs_bulkstat_one_iget( ASSERT(ip != NULL); ASSERT(ip->i_blkno != (xfs_daddr_t)0); - vp = XFS_ITOV(ip); dic = &ip->i_d; /* xfs_iget returns the following without needing @@ -85,7 +83,7 @@ xfs_bulkstat_one_iget( buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; buf->bs_size = dic->di_size; - vn_atime_to_bstime(vp, &buf->bs_atime); + vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime); buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 91b00a5686cd..ccba14eb9dbe 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -160,7 +160,7 @@ void xlog_trace_iclog(xlog_in_core_t *iclog, uint state) { if (!iclog->ic_trace) - iclog->ic_trace = ktrace_alloc(256, KM_SLEEP); + iclog->ic_trace = ktrace_alloc(256, KM_NOFS); ktrace_enter(iclog->ic_trace, (void *)((unsigned long)state), (void *)((unsigned long)current_pid()), @@ -336,15 +336,12 @@ xfs_log_done(xfs_mount_t *mp, } else { xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); xlog_regrant_reserve_log_space(log, ticket); - } - - /* If this ticket was a permanent reservation and we aren't - * trying to release it, reset the inited flags; so next time - * we write, a start record will be written out. - */ - if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) && - (flags & XFS_LOG_REL_PERM_RESERV) == 0) + /* If this ticket was a permanent reservation and we aren't + * trying to release it, reset the inited flags; so next time + * we write, a start record will be written out. + */ ticket->t_flags |= XLOG_TIC_INITED; + } return lsn; } /* xfs_log_done */ @@ -357,11 +354,11 @@ xfs_log_done(xfs_mount_t *mp, * Asynchronous forces are implemented by setting the WANT_SYNC * bit in the appropriate in-core log and then returning. * - * Synchronous forces are implemented with a semaphore. All callers - * to force a given lsn to disk will wait on a semaphore attached to the + * Synchronous forces are implemented with a signal variable. All callers + * to force a given lsn to disk will wait on a the sv attached to the * specific in-core log. When given in-core log finally completes its * write to disk, that thread will wake up all threads waiting on the - * semaphore. + * sv. */ int _xfs_log_force( @@ -588,12 +585,12 @@ error: * mp - ubiquitous xfs mount point structure */ int -xfs_log_mount_finish(xfs_mount_t *mp, int mfsi_flags) +xfs_log_mount_finish(xfs_mount_t *mp) { int error; if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) - error = xlog_recover_finish(mp->m_log, mfsi_flags); + error = xlog_recover_finish(mp->m_log); else { error = 0; ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); @@ -707,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (!(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY)) { if (!XLOG_FORCED_SHUTDOWN(log)) { - sv_wait(&iclog->ic_forcesema, PMEM, + sv_wait(&iclog->ic_force_wait, PMEM, &log->l_icloglock, s); } else { spin_unlock(&log->l_icloglock); @@ -748,7 +745,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) || iclog->ic_state == XLOG_STATE_DIRTY || iclog->ic_state == XLOG_STATE_IOERROR) ) { - sv_wait(&iclog->ic_forcesema, PMEM, + sv_wait(&iclog->ic_force_wait, PMEM, &log->l_icloglock, s); } else { spin_unlock(&log->l_icloglock); @@ -838,7 +835,7 @@ xfs_log_move_tail(xfs_mount_t *mp, break; tail_lsn = 0; free_bytes -= tic->t_unit_res; - sv_signal(&tic->t_sema); + sv_signal(&tic->t_wait); tic = tic->t_next; } while (tic != log->l_write_headq); } @@ -859,7 +856,7 @@ xfs_log_move_tail(xfs_mount_t *mp, break; tail_lsn = 0; free_bytes -= need_bytes; - sv_signal(&tic->t_sema); + sv_signal(&tic->t_wait); tic = tic->t_next; } while (tic != log->l_reserve_headq); } @@ -1285,8 +1282,8 @@ xlog_alloc_log(xfs_mount_t *mp, ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); - sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force"); - sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write"); + sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); + sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); iclogp = &iclog->ic_next; } @@ -1565,8 +1562,8 @@ xlog_dealloc_log(xlog_t *log) iclog = log->l_iclog; for (i=0; i<log->l_iclog_bufs; i++) { - sv_destroy(&iclog->ic_forcesema); - sv_destroy(&iclog->ic_writesema); + sv_destroy(&iclog->ic_force_wait); + sv_destroy(&iclog->ic_write_wait); xfs_buf_free(iclog->ic_bp); #ifdef XFS_LOG_TRACE if (iclog->ic_trace != NULL) { @@ -1976,7 +1973,7 @@ xlog_write(xfs_mount_t * mp, /* Clean iclogs starting from the head. This ordering must be * maintained, so an iclog doesn't become ACTIVE beyond one that * is SYNCING. This is also required to maintain the notion that we use - * a counting semaphore to hold off would be writers to the log when every + * a ordered wait queue to hold off would be writers to the log when every * iclog is trying to sync to disk. * * State Change: DIRTY -> ACTIVE @@ -2240,7 +2237,7 @@ xlog_state_do_callback( xlog_state_clean_log(log); /* wake up threads waiting in xfs_log_force() */ - sv_broadcast(&iclog->ic_forcesema); + sv_broadcast(&iclog->ic_force_wait); iclog = iclog->ic_next; } while (first_iclog != iclog); @@ -2302,8 +2299,7 @@ xlog_state_do_callback( * the second completion goes through. * * Callbacks could take time, so they are done outside the scope of the - * global state machine log lock. Assume that the calls to cvsema won't - * take a long time. At least we know it won't sleep. + * global state machine log lock. */ STATIC void xlog_state_done_syncing( @@ -2339,7 +2335,7 @@ xlog_state_done_syncing( * iclog buffer, we wake them all, one will get to do the * I/O, the others get to wait for the result. */ - sv_broadcast(&iclog->ic_writesema); + sv_broadcast(&iclog->ic_write_wait); spin_unlock(&log->l_icloglock); xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ } /* xlog_state_done_syncing */ @@ -2347,11 +2343,9 @@ xlog_state_done_syncing( /* * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must - * sleep. The flush semaphore is set to the number of in-core buffers and - * decremented around disk syncing. Therefore, if all buffers are syncing, - * this semaphore will cause new writes to sleep until a sync completes. - * Otherwise, this code just does p() followed by v(). This approximates - * a sleep/wakeup except we can't race. + * sleep. We wait on the flush queue on the head iclog as that should be + * the first iclog to complete flushing. Hence if all iclogs are syncing, + * we will wait here and all new writes will sleep until a sync completes. * * The in-core logs are used in a circular fashion. They are not used * out-of-order even when an iclog past the head is free. @@ -2508,7 +2502,7 @@ xlog_grant_log_space(xlog_t *log, goto error_return; XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* * If we got an error, and the filesystem is shutting down, * we'll catch it down below. So just continue... @@ -2534,7 +2528,7 @@ redo: xlog_trace_loggrant(log, tic, "xlog_grant_log_space: sleep 2"); XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); if (XLOG_FORCED_SHUTDOWN(log)) { spin_lock(&log->l_grant_lock); @@ -2633,7 +2627,7 @@ xlog_regrant_write_log_space(xlog_t *log, if (free_bytes < ntic->t_unit_res) break; free_bytes -= ntic->t_unit_res; - sv_signal(&ntic->t_sema); + sv_signal(&ntic->t_wait); ntic = ntic->t_next; } while (ntic != log->l_write_headq); @@ -2644,7 +2638,7 @@ xlog_regrant_write_log_space(xlog_t *log, xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: sleep 1"); XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_sema, PINOD|PLTWAIT, + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* If we're shutting down, this tic is already @@ -2673,7 +2667,7 @@ redo: if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) xlog_ins_ticketq(&log->l_write_headq, tic); XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* If we're shutting down, this tic is already off the queue */ if (XLOG_FORCED_SHUTDOWN(log)) { @@ -2916,7 +2910,7 @@ xlog_state_switch_iclogs(xlog_t *log, * 2. the current iclog is drity, and the previous iclog is in the * active or dirty state. * - * We may sleep (call psema) if: + * We may sleep if: * * 1. the current iclog is not in the active nor dirty state. * 2. the current iclog dirty, and the previous iclog is not in the @@ -3013,7 +3007,7 @@ maybe_sleep: return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s); + sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); /* * No need to grab the log lock here since we're * only deciding whether or not to return EIO @@ -3096,7 +3090,7 @@ try_again: XLOG_STATE_SYNCING))) { ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_prev->ic_writesema, PSWP, + sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, &log->l_icloglock, s); *log_flushed = 1; already_slept = 1; @@ -3116,7 +3110,7 @@ try_again: !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { /* - * Don't wait on the forcesema if we know that we've + * Don't wait on completion if we know that we've * gotten a log write error. */ if (iclog->ic_state & XLOG_STATE_IOERROR) { @@ -3124,7 +3118,7 @@ try_again: return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s); + sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); /* * No need to grab the log lock here since we're * only deciding whether or not to return EIO @@ -3180,7 +3174,7 @@ STATIC void xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket) { - sv_destroy(&ticket->t_sema); + sv_destroy(&ticket->t_wait); kmem_zone_free(xfs_log_ticket_zone, ticket); } /* xlog_ticket_put */ @@ -3270,7 +3264,7 @@ xlog_ticket_get(xlog_t *log, tic->t_trans_type = 0; if (xflags & XFS_LOG_PERM_RESERV) tic->t_flags |= XLOG_TIC_PERM_RESERV; - sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); + sv_init(&(tic->t_wait), SV_DEFAULT, "logtick"); xlog_tic_reset_res(tic); @@ -3557,14 +3551,14 @@ xfs_log_force_umount( */ if ((tic = log->l_reserve_headq)) { do { - sv_signal(&tic->t_sema); + sv_signal(&tic->t_wait); tic = tic->t_next; } while (tic != log->l_reserve_headq); } if ((tic = log->l_write_headq)) { do { - sv_signal(&tic->t_sema); + sv_signal(&tic->t_wait); tic = tic->t_next; } while (tic != log->l_write_headq); } diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d1d678ecb63e..d47b91f10822 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -149,7 +149,7 @@ int xfs_log_mount(struct xfs_mount *mp, struct xfs_buftarg *log_target, xfs_daddr_t start_block, int num_bblocks); -int xfs_log_mount_finish(struct xfs_mount *mp, int); +int xfs_log_mount_finish(struct xfs_mount *mp); void xfs_log_move_tail(struct xfs_mount *mp, xfs_lsn_t tail_lsn); int xfs_log_notify(struct xfs_mount *mp, diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 6245913196b4..c8a5b22ee3e3 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -241,7 +241,7 @@ typedef struct xlog_res { } xlog_res_t; typedef struct xlog_ticket { - sv_t t_sema; /* sleep on this semaphore : 20 */ + sv_t t_wait; /* ticket wait queue : 20 */ struct xlog_ticket *t_next; /* :4|8 */ struct xlog_ticket *t_prev; /* :4|8 */ xlog_tid_t t_tid; /* transaction identifier : 4 */ @@ -314,7 +314,7 @@ typedef struct xlog_rec_ext_header { * xlog_rec_header_t into the reserved space. * - ic_data follows, so a write to disk can start at the beginning of * the iclog. - * - ic_forcesema is used to implement synchronous forcing of the iclog to disk. + * - ic_forcewait is used to implement synchronous forcing of the iclog to disk. * - ic_next is the pointer to the next iclog in the ring. * - ic_bp is a pointer to the buffer used to write this incore log to disk. * - ic_log is a pointer back to the global log structure. @@ -339,8 +339,8 @@ typedef struct xlog_rec_ext_header { * and move everything else out to subsequent cachelines. */ typedef struct xlog_iclog_fields { - sv_t ic_forcesema; - sv_t ic_writesema; + sv_t ic_force_wait; + sv_t ic_write_wait; struct xlog_in_core *ic_next; struct xlog_in_core *ic_prev; struct xfs_buf *ic_bp; @@ -377,8 +377,8 @@ typedef struct xlog_in_core { /* * Defines to save our code from this glop. */ -#define ic_forcesema hic_fields.ic_forcesema -#define ic_writesema hic_fields.ic_writesema +#define ic_force_wait hic_fields.ic_force_wait +#define ic_write_wait hic_fields.ic_write_wait #define ic_next hic_fields.ic_next #define ic_prev hic_fields.ic_prev #define ic_bp hic_fields.ic_bp @@ -468,7 +468,7 @@ extern int xlog_find_tail(xlog_t *log, xfs_daddr_t *head_blk, xfs_daddr_t *tail_blk); extern int xlog_recover(xlog_t *log); -extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); +extern int xlog_recover_finish(xlog_t *log); extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); extern void xlog_recover_process_iunlinks(xlog_t *log); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 9eb722ec744e..82d46ce69d5f 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3940,8 +3940,7 @@ xlog_recover( */ int xlog_recover_finish( - xlog_t *log, - int mfsi_flags) + xlog_t *log) { /* * Now we're ready to do the transactions needed for the @@ -3969,9 +3968,7 @@ xlog_recover_finish( xfs_log_force(log->l_mp, (xfs_lsn_t)0, (XFS_LOG_FORCE | XFS_LOG_SYNC)); - if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) { - xlog_recover_process_iunlinks(log); - } + xlog_recover_process_iunlinks(log); xlog_recover_check_summary(log); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 6c5d1325e7f6..a4503f5e9497 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -128,7 +128,7 @@ static const struct { * initialized. */ STATIC void -xfs_mount_free( +xfs_free_perag( xfs_mount_t *mp) { if (mp->m_perag) { @@ -139,20 +139,6 @@ xfs_mount_free( kmem_free(mp->m_perag[agno].pagb_list); kmem_free(mp->m_perag); } - - spinlock_destroy(&mp->m_ail_lock); - spinlock_destroy(&mp->m_sb_lock); - mutex_destroy(&mp->m_ilock); - mutex_destroy(&mp->m_growlock); - if (mp->m_quotainfo) - XFS_QM_DONE(mp); - - if (mp->m_fsname != NULL) - kmem_free(mp->m_fsname); - if (mp->m_rtname != NULL) - kmem_free(mp->m_rtname); - if (mp->m_logname != NULL) - kmem_free(mp->m_logname); } /* @@ -704,11 +690,11 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) * Update alignment values based on mount options and sb values */ STATIC int -xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags) +xfs_update_alignment(xfs_mount_t *mp, __uint64_t *update_flags) { xfs_sb_t *sbp = &(mp->m_sb); - if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { + if (mp->m_dalign) { /* * If stripe unit and stripe width are not multiples * of the fs blocksize turn off alignment. @@ -864,7 +850,7 @@ xfs_set_inoalignment(xfs_mount_t *mp) * Check that the data (and log if separate) are an ok size. */ STATIC int -xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) +xfs_check_sizes(xfs_mount_t *mp) { xfs_buf_t *bp; xfs_daddr_t d; @@ -887,8 +873,7 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) return error; } - if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && - mp->m_logdev_targp != mp->m_ddev_targp) { + if (mp->m_logdev_targp != mp->m_ddev_targp) { d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { cmn_err(CE_WARN, "XFS: size check 3 failed"); @@ -923,15 +908,13 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) */ int xfs_mountfs( - xfs_mount_t *mp, - int mfsi_flags) + xfs_mount_t *mp) { xfs_sb_t *sbp = &(mp->m_sb); xfs_inode_t *rip; __uint64_t resblks; __int64_t update_flags = 0LL; uint quotamount, quotaflags; - int agno; int uuid_mounted = 0; int error = 0; @@ -985,7 +968,7 @@ xfs_mountfs( * allocator alignment is within an ag, therefore ag has * to be aligned at stripe boundary. */ - error = xfs_update_alignment(mp, mfsi_flags, &update_flags); + error = xfs_update_alignment(mp, &update_flags); if (error) goto error1; @@ -1004,8 +987,7 @@ xfs_mountfs( * since a single partition filesystem is identical to a single * partition volume/filesystem. */ - if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && - (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { + if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) { if (xfs_uuid_mount(mp)) { error = XFS_ERROR(EINVAL); goto error1; @@ -1033,7 +1015,7 @@ xfs_mountfs( /* * Check that the data (and log if separate) are an ok size. */ - error = xfs_check_sizes(mp, mfsi_flags); + error = xfs_check_sizes(mp); if (error) goto error1; @@ -1047,13 +1029,6 @@ xfs_mountfs( } /* - * For client case we are done now - */ - if (mfsi_flags & XFS_MFSI_CLIENT) { - return 0; - } - - /* * Copies the low order bits of the timestamp and the randomly * set "sequence" number out of a UUID. */ @@ -1077,8 +1052,10 @@ xfs_mountfs( * Allocate and initialize the per-ag data. */ init_rwsem(&mp->m_peraglock); - mp->m_perag = - kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); + mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), + KM_MAYFAIL); + if (!mp->m_perag) + goto error1; mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); @@ -1190,7 +1167,7 @@ xfs_mountfs( * delayed until after the root and real-time bitmap inodes * were consistently read in. */ - error = xfs_log_mount_finish(mp, mfsi_flags); + error = xfs_log_mount_finish(mp); if (error) { cmn_err(CE_WARN, "XFS: log mount finish failed"); goto error4; @@ -1199,7 +1176,7 @@ xfs_mountfs( /* * Complete the quota initialisation, post-log-replay component. */ - error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags); + error = XFS_QM_MOUNT(mp, quotamount, quotaflags); if (error) goto error4; @@ -1233,12 +1210,7 @@ xfs_mountfs( error3: xfs_log_unmount_dealloc(mp); error2: - for (agno = 0; agno < sbp->sb_agcount; agno++) - if (mp->m_perag[agno].pagb_list) - kmem_free(mp->m_perag[agno].pagb_list); - kmem_free(mp->m_perag); - mp->m_perag = NULL; - /* FALLTHROUGH */ + xfs_free_perag(mp); error1: if (uuid_mounted) uuid_table_remove(&mp->m_sb.sb_uuid); @@ -1246,16 +1218,17 @@ xfs_mountfs( } /* - * xfs_unmountfs - * * This flushes out the inodes,dquots and the superblock, unmounts the * log and makes sure that incore structures are freed. */ -int -xfs_unmountfs(xfs_mount_t *mp) +void +xfs_unmountfs( + struct xfs_mount *mp) { - __uint64_t resblks; - int error = 0; + __uint64_t resblks; + int error; + + IRELE(mp->m_rootip); /* * We can potentially deadlock here if we have an inode cluster @@ -1312,8 +1285,6 @@ xfs_unmountfs(xfs_mount_t *mp) xfs_unmountfs_wait(mp); /* wait for async bufs */ xfs_log_unmount(mp); /* Done! No more fs ops. */ - xfs_freesb(mp); - /* * All inodes from this mount point should be freed. */ @@ -1322,11 +1293,12 @@ xfs_unmountfs(xfs_mount_t *mp) if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) uuid_table_remove(&mp->m_sb.sb_uuid); -#if defined(DEBUG) || defined(INDUCE_IO_ERROR) +#if defined(DEBUG) xfs_errortag_clearall(mp, 0); #endif - xfs_mount_free(mp); - return 0; + xfs_free_perag(mp); + if (mp->m_quotainfo) + XFS_QM_DONE(mp); } STATIC void diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 5269bd6e3df0..f3c1024b1241 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -114,7 +114,7 @@ struct xfs_dqtrxops; struct xfs_quotainfo; typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); -typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint, int); +typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint); typedef int (*xfs_qmunmount_t)(struct xfs_mount *); typedef void (*xfs_qmdone_t)(struct xfs_mount *); typedef void (*xfs_dqrele_t)(struct xfs_dquot *); @@ -158,8 +158,8 @@ typedef struct xfs_qmops { #define XFS_QM_INIT(mp, mnt, fl) \ (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl) -#define XFS_QM_MOUNT(mp, mnt, fl, mfsi_flags) \ - (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl, mfsi_flags) +#define XFS_QM_MOUNT(mp, mnt, fl) \ + (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl) #define XFS_QM_UNMOUNT(mp) \ (*(mp)->m_qm_ops->xfs_qmunmount)(mp) #define XFS_QM_DONE(mp) \ @@ -442,13 +442,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, /* * Flags for xfs_mountfs */ -#define XFS_MFSI_SECOND 0x01 /* Secondary mount -- skip stuff */ -#define XFS_MFSI_CLIENT 0x02 /* Is a client -- skip lots of stuff */ -/* XFS_MFSI_RRINODES */ -#define XFS_MFSI_NOUNLINK 0x08 /* Skip unlinked inode processing in */ - /* log recovery */ -#define XFS_MFSI_NO_QUOTACHECK 0x10 /* Skip quotacheck processing */ -/* XFS_MFSI_CONVERT_SUNIT */ #define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */ #define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d) @@ -517,10 +510,10 @@ typedef struct xfs_mod_sb { extern void xfs_mod_sb(xfs_trans_t *, __int64_t); extern int xfs_log_sbcount(xfs_mount_t *, uint); -extern int xfs_mountfs(xfs_mount_t *mp, int); +extern int xfs_mountfs(xfs_mount_t *mp); extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); -extern int xfs_unmountfs(xfs_mount_t *); +extern void xfs_unmountfs(xfs_mount_t *); extern int xfs_unmountfs_writesb(xfs_mount_t *); extern int xfs_unmount_flush(xfs_mount_t *, int); extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index bf87a5913504..e2f68de16159 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -74,18 +74,6 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int, */ /* - * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set. - */ -STATIC int -xfs_lowbit32( - __uint32_t v) -{ - if (v) - return ffs(v) - 1; - return -1; -} - -/* * Allocate space to the bitmap or summary file, and zero it, for growfs. */ STATIC int /* error */ @@ -450,6 +438,7 @@ xfs_rtallocate_extent_near( } bbno = XFS_BITTOBLOCK(mp, bno); i = 0; + ASSERT(minlen != 0); log2len = xfs_highbit32(minlen); /* * Loop over all bitmap blocks (bbno + i is current block). @@ -618,6 +607,8 @@ xfs_rtallocate_extent_size( xfs_suminfo_t sum; /* summary information for extents */ ASSERT(minlen % prod == 0 && maxlen % prod == 0); + ASSERT(maxlen != 0); + /* * Loop over all the levels starting with maxlen. * At each level, look at all the bitmap blocks, to see if there @@ -675,6 +666,9 @@ xfs_rtallocate_extent_size( *rtblock = NULLRTBLOCK; return 0; } + ASSERT(minlen != 0); + ASSERT(maxlen != 0); + /* * Loop over sizes, from maxlen down to minlen. * This time, when we do the allocations, allow smaller ones @@ -1961,6 +1955,7 @@ xfs_growfs_rt( nsbp->sb_blocksize * nsbp->sb_rextsize); nsbp->sb_rextents = nsbp->sb_rblocks; do_div(nsbp->sb_rextents, nsbp->sb_rextsize); + ASSERT(nsbp->sb_rextents != 0); nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; nrsumsize = diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index b0f31c09a76d..3a82576dde9a 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -314,7 +314,7 @@ xfs_bioerror_relse( * ASYNC buffers. */ XFS_BUF_ERROR(bp, EIO); - XFS_BUF_V_IODONESEMA(bp); + XFS_BUF_FINISH_IOWAIT(bp); } else { xfs_buf_relse(bp); } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index e4ebddd3c500..4e1c22a23be5 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -43,6 +43,7 @@ #include "xfs_quota.h" #include "xfs_trans_priv.h" #include "xfs_trans_space.h" +#include "xfs_inode_item.h" STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *); @@ -253,7 +254,7 @@ _xfs_trans_alloc( tp->t_mountp = mp; tp->t_items_free = XFS_LIC_NUM_SLOTS; tp->t_busy_free = XFS_LBC_NUM_SLOTS; - XFS_LIC_INIT(&(tp->t_items)); + xfs_lic_init(&(tp->t_items)); XFS_LBC_INIT(&(tp->t_busy)); return tp; } @@ -282,7 +283,7 @@ xfs_trans_dup( ntp->t_mountp = tp->t_mountp; ntp->t_items_free = XFS_LIC_NUM_SLOTS; ntp->t_busy_free = XFS_LBC_NUM_SLOTS; - XFS_LIC_INIT(&(ntp->t_items)); + xfs_lic_init(&(ntp->t_items)); XFS_LBC_INIT(&(ntp->t_busy)); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); @@ -1169,7 +1170,7 @@ xfs_trans_cancel( while (licp != NULL) { lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } @@ -1216,6 +1217,68 @@ xfs_trans_free( kmem_zone_free(xfs_trans_zone, tp); } +/* + * Roll from one trans in the sequence of PERMANENT transactions to + * the next: permanent transactions are only flushed out when + * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon + * as possible to let chunks of it go to the log. So we commit the + * chunk we've been working on and get a new transaction to continue. + */ +int +xfs_trans_roll( + struct xfs_trans **tpp, + struct xfs_inode *dp) +{ + struct xfs_trans *trans; + unsigned int logres, count; + int error; + + /* + * Ensure that the inode is always logged. + */ + trans = *tpp; + xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); + + /* + * Copy the critical parameters from one trans to the next. + */ + logres = trans->t_log_res; + count = trans->t_log_count; + *tpp = xfs_trans_dup(trans); + + /* + * Commit the current transaction. + * If this commit failed, then it'd just unlock those items that + * are not marked ihold. That also means that a filesystem shutdown + * is in progress. The caller takes the responsibility to cancel + * the duplicate transaction that gets returned. + */ + error = xfs_trans_commit(trans, 0); + if (error) + return (error); + + trans = *tpp; + + /* + * Reserve space in the log for th next transaction. + * This also pushes items in the "AIL", the list of logged items, + * out to disk if they are taking up space at the tail of the log + * that we want to use. This requires that either nothing be locked + * across this call, or that anything that is locked be logged in + * the prior and the next transactions. + */ + error = xfs_trans_reserve(trans, 0, logres, 0, + XFS_TRANS_PERM_LOG_RES, count); + /* + * Ensure that the inode is in the new transaction and locked. + */ + if (error) + return error; + + xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); + xfs_trans_ihold(trans, dp); + return 0; +} /* * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item(). @@ -1253,7 +1316,7 @@ xfs_trans_committed( * Special case the chunk embedded in the transaction. */ licp = &(tp->t_items); - if (!(XFS_LIC_ARE_ALL_FREE(licp))) { + if (!(xfs_lic_are_all_free(licp))) { xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); } @@ -1262,7 +1325,7 @@ xfs_trans_committed( */ licp = licp->lic_next; while (licp != NULL) { - ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + ASSERT(!xfs_lic_are_all_free(licp)); xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); next_licp = licp->lic_next; kmem_free(licp); @@ -1325,7 +1388,7 @@ xfs_trans_chunk_committed( lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 0804207c7391..74c80bd2b0ec 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -210,62 +210,52 @@ typedef struct xfs_log_item_chunk { * lic_unused to the right value (0 matches all free). The * lic_descs.lid_index values are set up as each desc is allocated. */ -#define XFS_LIC_INIT(cp) xfs_lic_init(cp) static inline void xfs_lic_init(xfs_log_item_chunk_t *cp) { cp->lic_free = XFS_LIC_FREEMASK; } -#define XFS_LIC_INIT_SLOT(cp,slot) xfs_lic_init_slot(cp, slot) static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot) { cp->lic_descs[slot].lid_index = (unsigned char)(slot); } -#define XFS_LIC_VACANCY(cp) xfs_lic_vacancy(cp) static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp) { return cp->lic_free & XFS_LIC_FREEMASK; } -#define XFS_LIC_ALL_FREE(cp) xfs_lic_all_free(cp) static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp) { cp->lic_free = XFS_LIC_FREEMASK; } -#define XFS_LIC_ARE_ALL_FREE(cp) xfs_lic_are_all_free(cp) static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp) { return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK); } -#define XFS_LIC_ISFREE(cp,slot) xfs_lic_isfree(cp,slot) static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot) { return (cp->lic_free & (1 << slot)); } -#define XFS_LIC_CLAIM(cp,slot) xfs_lic_claim(cp,slot) static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot) { cp->lic_free &= ~(1 << slot); } -#define XFS_LIC_RELSE(cp,slot) xfs_lic_relse(cp,slot) static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot) { cp->lic_free |= 1 << slot; } -#define XFS_LIC_SLOT(cp,slot) xfs_lic_slot(cp,slot) static inline xfs_log_item_desc_t * xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot) { return &(cp->lic_descs[slot]); } -#define XFS_LIC_DESC_TO_SLOT(dp) xfs_lic_desc_to_slot(dp) static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp) { return (uint)dp->lid_index; @@ -278,7 +268,6 @@ static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp) * All of this yields the address of the chunk, which is * cast to a chunk pointer. */ -#define XFS_LIC_DESC_TO_CHUNK(dp) xfs_lic_desc_to_chunk(dp) static inline xfs_log_item_chunk_t * xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) { @@ -986,6 +975,7 @@ int _xfs_trans_commit(xfs_trans_t *, int *); #define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) void xfs_trans_cancel(xfs_trans_t *, int); +int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index cb0c5839154b..4e855b5ced66 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -1021,16 +1021,16 @@ xfs_trans_buf_item_match( bp = NULL; len = BBTOB(len); licp = &tp->t_items; - if (!XFS_LIC_ARE_ALL_FREE(licp)) { + if (!xfs_lic_are_all_free(licp)) { for (i = 0; i < licp->lic_unused; i++) { /* * Skip unoccupied slots. */ - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } - lidp = XFS_LIC_SLOT(licp, i); + lidp = xfs_lic_slot(licp, i); blip = (xfs_buf_log_item_t *)lidp->lid_item; if (blip->bli_item.li_type != XFS_LI_BUF) { continue; @@ -1074,7 +1074,7 @@ xfs_trans_buf_item_match_all( bp = NULL; len = BBTOB(len); for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { - if (XFS_LIC_ARE_ALL_FREE(licp)) { + if (xfs_lic_are_all_free(licp)) { ASSERT(licp == &tp->t_items); ASSERT(licp->lic_next == NULL); return NULL; @@ -1083,11 +1083,11 @@ xfs_trans_buf_item_match_all( /* * Skip unoccupied slots. */ - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } - lidp = XFS_LIC_SLOT(licp, i); + lidp = xfs_lic_slot(licp, i); blip = (xfs_buf_log_item_t *)lidp->lid_item; if (blip->bli_item.li_type != XFS_LI_BUF) { continue; diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index db5c83595526..3c666e8317f8 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c @@ -53,11 +53,11 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) * Initialize the chunk, and then * claim the first slot in the newly allocated chunk. */ - XFS_LIC_INIT(licp); - XFS_LIC_CLAIM(licp, 0); + xfs_lic_init(licp); + xfs_lic_claim(licp, 0); licp->lic_unused = 1; - XFS_LIC_INIT_SLOT(licp, 0); - lidp = XFS_LIC_SLOT(licp, 0); + xfs_lic_init_slot(licp, 0); + lidp = xfs_lic_slot(licp, 0); /* * Link in the new chunk and update the free count. @@ -88,14 +88,14 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) */ licp = &tp->t_items; while (licp != NULL) { - if (XFS_LIC_VACANCY(licp)) { + if (xfs_lic_vacancy(licp)) { if (licp->lic_unused <= XFS_LIC_MAX_SLOT) { i = licp->lic_unused; - ASSERT(XFS_LIC_ISFREE(licp, i)); + ASSERT(xfs_lic_isfree(licp, i)); break; } for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) { - if (XFS_LIC_ISFREE(licp, i)) + if (xfs_lic_isfree(licp, i)) break; } ASSERT(i <= XFS_LIC_MAX_SLOT); @@ -108,12 +108,12 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) * If we find a free descriptor, claim it, * initialize it, and return it. */ - XFS_LIC_CLAIM(licp, i); + xfs_lic_claim(licp, i); if (licp->lic_unused <= i) { licp->lic_unused = i + 1; - XFS_LIC_INIT_SLOT(licp, i); + xfs_lic_init_slot(licp, i); } - lidp = XFS_LIC_SLOT(licp, i); + lidp = xfs_lic_slot(licp, i); tp->t_items_free--; lidp->lid_item = lip; lidp->lid_flags = 0; @@ -136,9 +136,9 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) xfs_log_item_chunk_t *licp; xfs_log_item_chunk_t **licpp; - slot = XFS_LIC_DESC_TO_SLOT(lidp); - licp = XFS_LIC_DESC_TO_CHUNK(lidp); - XFS_LIC_RELSE(licp, slot); + slot = xfs_lic_desc_to_slot(lidp); + licp = xfs_lic_desc_to_chunk(lidp); + xfs_lic_relse(licp, slot); lidp->lid_item->li_desc = NULL; tp->t_items_free++; @@ -154,7 +154,7 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) * Also decrement the transaction structure's count of free items * by the number in a chunk since we are freeing an empty chunk. */ - if (XFS_LIC_ARE_ALL_FREE(licp) && (licp != &(tp->t_items))) { + if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) { licpp = &(tp->t_items.lic_next); while (*licpp != licp) { ASSERT(*licpp != NULL); @@ -207,20 +207,20 @@ xfs_trans_first_item(xfs_trans_t *tp) /* * If it's not in the first chunk, skip to the second. */ - if (XFS_LIC_ARE_ALL_FREE(licp)) { + if (xfs_lic_are_all_free(licp)) { licp = licp->lic_next; } /* * Return the first non-free descriptor in the chunk. */ - ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + ASSERT(!xfs_lic_are_all_free(licp)); for (i = 0; i < licp->lic_unused; i++) { - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } - return XFS_LIC_SLOT(licp, i); + return xfs_lic_slot(licp, i); } cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item"); return NULL; @@ -242,18 +242,18 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) xfs_log_item_chunk_t *licp; int i; - licp = XFS_LIC_DESC_TO_CHUNK(lidp); + licp = xfs_lic_desc_to_chunk(lidp); /* * First search the rest of the chunk. The for loop keeps us * from referencing things beyond the end of the chunk. */ - for (i = (int)XFS_LIC_DESC_TO_SLOT(lidp) + 1; i < licp->lic_unused; i++) { - if (XFS_LIC_ISFREE(licp, i)) { + for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) { + if (xfs_lic_isfree(licp, i)) { continue; } - return XFS_LIC_SLOT(licp, i); + return xfs_lic_slot(licp, i); } /* @@ -266,13 +266,13 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) } licp = licp->lic_next; - ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + ASSERT(!xfs_lic_are_all_free(licp)); for (i = 0; i < licp->lic_unused; i++) { - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } - return XFS_LIC_SLOT(licp, i); + return xfs_lic_slot(licp, i); } ASSERT(0); /* NOTREACHED */ @@ -300,9 +300,9 @@ xfs_trans_free_items( /* * Special case the embedded chunk so we don't free it below. */ - if (!XFS_LIC_ARE_ALL_FREE(licp)) { + if (!xfs_lic_are_all_free(licp)) { (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); - XFS_LIC_ALL_FREE(licp); + xfs_lic_all_free(licp); licp->lic_unused = 0; } licp = licp->lic_next; @@ -311,7 +311,7 @@ xfs_trans_free_items( * Unlock each item in each chunk and free the chunks. */ while (licp != NULL) { - ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + ASSERT(!xfs_lic_are_all_free(licp)); (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); next_licp = licp->lic_next; kmem_free(licp); @@ -347,7 +347,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn) /* * Special case the embedded chunk so we don't free. */ - if (!XFS_LIC_ARE_ALL_FREE(licp)) { + if (!xfs_lic_are_all_free(licp)) { freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); } licpp = &(tp->t_items.lic_next); @@ -358,10 +358,10 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn) * and free empty chunks. */ while (licp != NULL) { - ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + ASSERT(!xfs_lic_are_all_free(licp)); freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); next_licp = licp->lic_next; - if (XFS_LIC_ARE_ALL_FREE(licp)) { + if (xfs_lic_are_all_free(licp)) { *licpp = next_licp; kmem_free(licp); freed -= XFS_LIC_NUM_SLOTS; @@ -402,7 +402,7 @@ xfs_trans_unlock_chunk( freed = 0; lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } lip = lidp->lid_item; @@ -421,7 +421,7 @@ xfs_trans_unlock_chunk( */ if (!(freeing_chunk) && (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) { - XFS_LIC_RELSE(licp, i); + xfs_lic_relse(licp, i); freed++; } } diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 98e5f110ba5f..35d4d414bcc2 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -237,7 +237,7 @@ xfs_droplink( ASSERT (ip->i_d.di_nlink > 0); ip->i_d.di_nlink--; - drop_nlink(ip->i_vnode); + drop_nlink(VFS_I(ip)); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = 0; @@ -301,7 +301,7 @@ xfs_bumplink( ASSERT(ip->i_d.di_nlink > 0); ip->i_d.di_nlink++; - inc_nlink(ip->i_vnode); + inc_nlink(VFS_I(ip)); if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && (ip->i_d.di_nlink > XFS_MAXLINK_1)) { /* diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index f316cb85d8e2..ef321225d269 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h @@ -18,9 +18,6 @@ #ifndef __XFS_UTILS_H__ #define __XFS_UTILS_H__ -#define IRELE(ip) VN_RELE(XFS_ITOV(ip)) -#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) - extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, xfs_dev_t, cred_t *, prid_t, int, diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 4a9a43315a86..439dd3939dda 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -128,7 +128,6 @@ xfs_unmount_flush( xfs_inode_t *rip = mp->m_rootip; xfs_inode_t *rbmip; xfs_inode_t *rsumip = NULL; - bhv_vnode_t *rvp = XFS_ITOV(rip); int error; xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); @@ -146,7 +145,7 @@ xfs_unmount_flush( if (error == EFSCORRUPTED) goto fscorrupt_out; - ASSERT(vn_count(XFS_ITOV(rbmip)) == 1); + ASSERT(vn_count(VFS_I(rbmip)) == 1); rsumip = mp->m_rsumip; xfs_ilock(rsumip, XFS_ILOCK_EXCL); @@ -157,7 +156,7 @@ xfs_unmount_flush( if (error == EFSCORRUPTED) goto fscorrupt_out; - ASSERT(vn_count(XFS_ITOV(rsumip)) == 1); + ASSERT(vn_count(VFS_I(rsumip)) == 1); } /* @@ -167,7 +166,7 @@ xfs_unmount_flush( if (error == EFSCORRUPTED) goto fscorrupt_out2; - if (vn_count(rvp) != 1 && !relocation) { + if (vn_count(VFS_I(rip)) != 1 && !relocation) { xfs_iunlock(rip, XFS_ILOCK_EXCL); return XFS_ERROR(EBUSY); } @@ -284,7 +283,7 @@ xfs_sync_inodes( int *bypassed) { xfs_inode_t *ip = NULL; - bhv_vnode_t *vp = NULL; + struct inode *vp = NULL; int error; int last_error; uint64_t fflag; @@ -404,7 +403,7 @@ xfs_sync_inodes( continue; } - vp = XFS_ITOV_NULL(ip); + vp = VFS_I(ip); /* * If the vnode is gone then this is being torn down, @@ -479,7 +478,7 @@ xfs_sync_inodes( IPOINTER_INSERT(ip, mp); xfs_ilock(ip, lock_flags); - ASSERT(vp == XFS_ITOV(ip)); + ASSERT(vp == VFS_I(ip)); ASSERT(ip->i_mount == mp); vnode_refed = B_TRUE; diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 76a1166af822..aa238c8fbd7a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -83,7 +83,7 @@ xfs_setattr( cred_t *credp) { xfs_mount_t *mp = ip->i_mount; - struct inode *inode = XFS_ITOV(ip); + struct inode *inode = VFS_I(ip); int mask = iattr->ia_valid; xfs_trans_t *tp; int code; @@ -182,7 +182,7 @@ xfs_setattr( xfs_ilock(ip, lock_flags); /* boolean: are we the file owner? */ - file_owner = (current_fsuid(credp) == ip->i_d.di_uid); + file_owner = (current_fsuid() == ip->i_d.di_uid); /* * Change various properties of a file. @@ -513,7 +513,6 @@ xfs_setattr( ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; ip->i_update_core = 1; - timeflags &= ~XFS_ICHGTIME_ACC; } if (mask & ATTR_MTIME) { inode->i_mtime = iattr->ia_mtime; @@ -714,7 +713,7 @@ xfs_fsync( return XFS_ERROR(EIO); /* capture size updates in I/O completion before writing the inode. */ - error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + error = filemap_fdatawait(VFS_I(ip)->i_mapping); if (error) return XFS_ERROR(error); @@ -1160,7 +1159,6 @@ int xfs_release( xfs_inode_t *ip) { - bhv_vnode_t *vp = XFS_ITOV(ip); xfs_mount_t *mp = ip->i_mount; int error; @@ -1195,13 +1193,13 @@ xfs_release( * be exposed to that problem. */ truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); - if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0) + if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); } if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || + ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS)) && (!(ip->i_d.di_flags & @@ -1227,7 +1225,6 @@ int xfs_inactive( xfs_inode_t *ip) { - bhv_vnode_t *vp = XFS_ITOV(ip); xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int committed; @@ -1242,7 +1239,7 @@ xfs_inactive( * If the inode is already free, then there can be nothing * to clean up here. */ - if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { + if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) { ASSERT(ip->i_df.if_real_bytes == 0); ASSERT(ip->i_df.if_broot_bytes == 0); return VN_INACTIVE_CACHE; @@ -1272,7 +1269,7 @@ xfs_inactive( if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || + ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS) && (!(ip->i_d.di_flags & @@ -1536,7 +1533,7 @@ xfs_create( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), prid, + current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -1708,111 +1705,6 @@ std_return: } #ifdef DEBUG -/* - * Some counters to see if (and how often) we are hitting some deadlock - * prevention code paths. - */ - -int xfs_rm_locks; -int xfs_rm_lock_delays; -int xfs_rm_attempts; -#endif - -/* - * The following routine will lock the inodes associated with the - * directory and the named entry in the directory. The locks are - * acquired in increasing inode number. - * - * If the entry is "..", then only the directory is locked. The - * vnode ref count will still include that from the .. entry in - * this case. - * - * There is a deadlock we need to worry about. If the locked directory is - * in the AIL, it might be blocking up the log. The next inode we lock - * could be already locked by another thread waiting for log space (e.g - * a permanent log reservation with a long running transaction (see - * xfs_itruncate_finish)). To solve this, we must check if the directory - * is in the ail and use lock_nowait. If we can't lock, we need to - * drop the inode lock on the directory and try again. xfs_iunlock will - * potentially push the tail if we were holding up the log. - */ -STATIC int -xfs_lock_dir_and_entry( - xfs_inode_t *dp, - xfs_inode_t *ip) /* inode of entry 'name' */ -{ - int attempts; - xfs_ino_t e_inum; - xfs_inode_t *ips[2]; - xfs_log_item_t *lp; - -#ifdef DEBUG - xfs_rm_locks++; -#endif - attempts = 0; - -again: - xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); - - e_inum = ip->i_ino; - - xfs_itrace_ref(ip); - - /* - * We want to lock in increasing inum. Since we've already - * acquired the lock on the directory, we may need to release - * if if the inum of the entry turns out to be less. - */ - if (e_inum > dp->i_ino) { - /* - * We are already in the right order, so just - * lock on the inode of the entry. - * We need to use nowait if dp is in the AIL. - */ - - lp = (xfs_log_item_t *)dp->i_itemp; - if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { - if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { - attempts++; -#ifdef DEBUG - xfs_rm_attempts++; -#endif - - /* - * Unlock dp and try again. - * xfs_iunlock will try to push the tail - * if the inode is in the AIL. - */ - - xfs_iunlock(dp, XFS_ILOCK_EXCL); - - if ((attempts % 5) == 0) { - delay(1); /* Don't just spin the CPU */ -#ifdef DEBUG - xfs_rm_lock_delays++; -#endif - } - goto again; - } - } else { - xfs_ilock(ip, XFS_ILOCK_EXCL); - } - } else if (e_inum < dp->i_ino) { - xfs_iunlock(dp, XFS_ILOCK_EXCL); - - ips[0] = ip; - ips[1] = dp; - xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); - } - /* else e_inum == dp->i_ino */ - /* This can happen if we're asked to lock /x/.. - * the entry is "..", which is also the parent directory. - */ - - return 0; -} - -#ifdef DEBUG int xfs_locked_n; int xfs_small_retries; int xfs_middle_retries; @@ -1946,6 +1838,45 @@ again: #endif } +void +xfs_lock_two_inodes( + xfs_inode_t *ip0, + xfs_inode_t *ip1, + uint lock_mode) +{ + xfs_inode_t *temp; + int attempts = 0; + xfs_log_item_t *lp; + + ASSERT(ip0->i_ino != ip1->i_ino); + + if (ip0->i_ino > ip1->i_ino) { + temp = ip0; + ip0 = ip1; + ip1 = temp; + } + + again: + xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); + + /* + * If the first lock we have locked is in the AIL, we must TRY to get + * the second lock. If we can't get it, we must release the first one + * and try again. + */ + lp = (xfs_log_item_t *)ip0->i_itemp; + if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { + if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { + xfs_iunlock(ip0, lock_mode); + if ((++attempts % 5) == 0) + delay(1); /* Don't just spin the CPU */ + goto again; + } + } else { + xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); + } +} + int xfs_remove( xfs_inode_t *dp, @@ -2018,9 +1949,7 @@ xfs_remove( goto out_trans_cancel; } - error = xfs_lock_dir_and_entry(dp, ip); - if (error) - goto out_trans_cancel; + xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); /* * At this point, we've gotten both the directory and the entry @@ -2047,9 +1976,6 @@ xfs_remove( } } - /* - * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. - */ XFS_BMAP_INIT(&free_list, &first_block); error = xfs_dir_removename(tp, dp, name, ip->i_ino, &first_block, &free_list, resblks); @@ -2155,7 +2081,6 @@ xfs_link( { xfs_mount_t *mp = tdp->i_mount; xfs_trans_t *tp; - xfs_inode_t *ips[2]; int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; @@ -2203,15 +2128,7 @@ xfs_link( goto error_return; } - if (sip->i_ino < tdp->i_ino) { - ips[0] = sip; - ips[1] = tdp; - } else { - ips[0] = tdp; - ips[1] = sip; - } - - xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); /* * Increment vnode ref counts since xfs_trans_commit & @@ -2352,7 +2269,7 @@ xfs_mkdir( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), prid, + current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -2578,7 +2495,7 @@ xfs_symlink( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), prid, + current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -2873,14 +2790,13 @@ int xfs_reclaim( xfs_inode_t *ip) { - bhv_vnode_t *vp = XFS_ITOV(ip); xfs_itrace_entry(ip); - ASSERT(!VN_MAPPED(vp)); + ASSERT(!VN_MAPPED(VFS_I(ip))); /* bad inode, get out here ASAP */ - if (VN_BAD(vp)) { + if (VN_BAD(VFS_I(ip))) { xfs_ireclaim(ip); return 0; } @@ -2917,7 +2833,7 @@ xfs_reclaim( XFS_MOUNT_ILOCK(mp); spin_lock(&ip->i_flags_lock); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - vn_to_inode(vp)->i_private = NULL; + VFS_I(ip)->i_private = NULL; ip->i_vnode = NULL; spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); @@ -2933,7 +2849,7 @@ xfs_finish_reclaim( int sync_mode) { xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); - bhv_vnode_t *vp = XFS_ITOV_NULL(ip); + struct inode *vp = VFS_I(ip); if (vp && VN_BAD(vp)) goto reclaim; @@ -3321,7 +3237,6 @@ xfs_free_file_space( xfs_off_t len, int attr_flags) { - bhv_vnode_t *vp; int committed; int done; xfs_off_t end_dmi_offset; @@ -3341,7 +3256,6 @@ xfs_free_file_space( xfs_trans_t *tp; int need_iolock = 1; - vp = XFS_ITOV(ip); mp = ip->i_mount; xfs_itrace_entry(ip); @@ -3378,7 +3292,7 @@ xfs_free_file_space( rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); ioffset = offset & ~(rounding - 1); - if (VN_CACHED(vp) != 0) { + if (VN_CACHED(VFS_I(ip)) != 0) { xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1); error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); if (error) |