summaryrefslogtreecommitdiffstats
path: root/fs/exofs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/exofs/inode.c')
-rw-r--r--fs/exofs/inode.c546
1 files changed, 299 insertions, 247 deletions
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 6c10f7476699..a17e4b733e35 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -37,88 +37,110 @@
#include "exofs.h"
-#ifdef CONFIG_EXOFS_DEBUG
-# define EXOFS_DEBUG_OBJ_ISIZE 1
-#endif
+#define EXOFS_DBGMSG2(M...) do {} while (0)
+
+enum { BIO_MAX_PAGES_KMALLOC =
+ (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
+ MAX_PAGES_KMALLOC =
+ PAGE_SIZE / sizeof(struct page *),
+};
struct page_collect {
struct exofs_sb_info *sbi;
- struct request_queue *req_q;
struct inode *inode;
unsigned expected_pages;
+ struct exofs_io_state *ios;
- struct bio *bio;
+ struct page **pages;
+ unsigned alloc_pages;
unsigned nr_pages;
unsigned long length;
loff_t pg_first; /* keep 64bit also in 32-arches */
};
static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
- struct inode *inode)
+ struct inode *inode)
{
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
pcol->sbi = sbi;
- pcol->req_q = osd_request_queue(sbi->s_dev);
pcol->inode = inode;
pcol->expected_pages = expected_pages;
- pcol->bio = NULL;
+ pcol->ios = NULL;
+ pcol->pages = NULL;
+ pcol->alloc_pages = 0;
pcol->nr_pages = 0;
pcol->length = 0;
pcol->pg_first = -1;
-
- EXOFS_DBGMSG("_pcol_init ino=0x%lx expected_pages=%u\n", inode->i_ino,
- expected_pages);
}
static void _pcol_reset(struct page_collect *pcol)
{
pcol->expected_pages -= min(pcol->nr_pages, pcol->expected_pages);
- pcol->bio = NULL;
+ pcol->pages = NULL;
+ pcol->alloc_pages = 0;
pcol->nr_pages = 0;
pcol->length = 0;
pcol->pg_first = -1;
- EXOFS_DBGMSG("_pcol_reset ino=0x%lx expected_pages=%u\n",
- pcol->inode->i_ino, pcol->expected_pages);
+ pcol->ios = NULL;
/* this is probably the end of the loop but in writes
* it might not end here. don't be left with nothing
*/
if (!pcol->expected_pages)
- pcol->expected_pages = 128;
+ pcol->expected_pages = MAX_PAGES_KMALLOC;
}
static int pcol_try_alloc(struct page_collect *pcol)
{
- int pages = min_t(unsigned, pcol->expected_pages, BIO_MAX_PAGES);
+ unsigned pages = min_t(unsigned, pcol->expected_pages,
+ MAX_PAGES_KMALLOC);
+
+ if (!pcol->ios) { /* First time allocate io_state */
+ int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
+
+ if (ret)
+ return ret;
+ }
+
+ /* TODO: easily support bio chaining */
+ pages = min_t(unsigned, pages,
+ pcol->sbi->layout.group_width * BIO_MAX_PAGES_KMALLOC);
for (; pages; pages >>= 1) {
- pcol->bio = bio_alloc(GFP_KERNEL, pages);
- if (likely(pcol->bio))
+ pcol->pages = kmalloc(pages * sizeof(struct page *),
+ GFP_KERNEL);
+ if (likely(pcol->pages)) {
+ pcol->alloc_pages = pages;
return 0;
+ }
}
- EXOFS_ERR("Failed to kcalloc expected_pages=%u\n",
+ EXOFS_ERR("Failed to kmalloc expected_pages=%u\n",
pcol->expected_pages);
return -ENOMEM;
}
static void pcol_free(struct page_collect *pcol)
{
- bio_put(pcol->bio);
- pcol->bio = NULL;
+ kfree(pcol->pages);
+ pcol->pages = NULL;
+
+ if (pcol->ios) {
+ exofs_put_io_state(pcol->ios);
+ pcol->ios = NULL;
+ }
}
static int pcol_add_page(struct page_collect *pcol, struct page *page,
unsigned len)
{
- int added_len = bio_add_pc_page(pcol->req_q, pcol->bio, page, len, 0);
- if (unlikely(len != added_len))
+ if (unlikely(pcol->nr_pages >= pcol->alloc_pages))
return -ENOMEM;
- ++pcol->nr_pages;
+ pcol->pages[pcol->nr_pages++] = page;
pcol->length += len;
return 0;
}
@@ -161,32 +183,26 @@ static void update_write_page(struct page *page, int ret)
/* Called at the end of reads, to optionally unlock pages and update their
* status.
*/
-static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
- bool do_unlock)
+static int __readpages_done(struct page_collect *pcol, bool do_unlock)
{
- struct bio_vec *bvec;
int i;
u64 resid;
u64 good_bytes;
u64 length = 0;
- int ret = exofs_check_ok_resid(or, &resid, NULL);
-
- osd_end_request(or);
+ int ret = exofs_check_io(pcol->ios, &resid);
if (likely(!ret))
good_bytes = pcol->length;
- else if (!resid)
- good_bytes = 0;
else
good_bytes = pcol->length - resid;
- EXOFS_DBGMSG("readpages_done(0x%lx) good_bytes=0x%llx"
+ EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx"
" length=0x%lx nr_pages=%u\n",
pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
pcol->nr_pages);
- __bio_for_each_segment(bvec, pcol->bio, i, 0) {
- struct page *page = bvec->bv_page;
+ for (i = 0; i < pcol->nr_pages; i++) {
+ struct page *page = pcol->pages[i];
struct inode *inode = page->mapping->host;
int page_stat;
@@ -198,38 +214,37 @@ static int __readpages_done(struct osd_request *or, struct page_collect *pcol,
else
page_stat = ret;
- EXOFS_DBGMSG(" readpages_done(0x%lx, 0x%lx) %s\n",
+ EXOFS_DBGMSG2(" readpages_done(0x%lx, 0x%lx) %s\n",
inode->i_ino, page->index,
page_stat ? "bad_bytes" : "good_bytes");
ret = update_read_page(page, page_stat);
if (do_unlock)
unlock_page(page);
- length += bvec->bv_len;
+ length += PAGE_SIZE;
}
pcol_free(pcol);
- EXOFS_DBGMSG("readpages_done END\n");
+ EXOFS_DBGMSG2("readpages_done END\n");
return ret;
}
/* callback of async reads */
-static void readpages_done(struct osd_request *or, void *p)
+static void readpages_done(struct exofs_io_state *ios, void *p)
{
struct page_collect *pcol = p;
- __readpages_done(or, pcol, true);
+ __readpages_done(pcol, true);
atomic_dec(&pcol->sbi->s_curr_pending);
- kfree(p);
+ kfree(pcol);
}
static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
{
- struct bio_vec *bvec;
int i;
- __bio_for_each_segment(bvec, pcol->bio, i, 0) {
- struct page *page = bvec->bv_page;
+ for (i = 0; i < pcol->nr_pages; i++) {
+ struct page *page = pcol->pages[i];
if (rw == READ)
update_read_page(page, ret);
@@ -238,36 +253,29 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
unlock_page(page);
}
- pcol_free(pcol);
}
static int read_exec(struct page_collect *pcol, bool is_sync)
{
struct exofs_i_info *oi = exofs_i(pcol->inode);
- struct osd_obj_id obj = {pcol->sbi->s_pid,
- pcol->inode->i_ino + EXOFS_OBJ_OFF};
- struct osd_request *or = NULL;
+ struct exofs_io_state *ios = pcol->ios;
struct page_collect *pcol_copy = NULL;
- loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
int ret;
- if (!pcol->bio)
+ if (!pcol->pages)
return 0;
/* see comment in _readpage() about sync reads */
WARN_ON(is_sync && (pcol->nr_pages != 1));
- or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
- if (unlikely(!or)) {
- ret = -ENOMEM;
- goto err;
- }
-
- osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
+ ios->pages = pcol->pages;
+ ios->nr_pages = pcol->nr_pages;
+ ios->length = pcol->length;
+ ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
if (is_sync) {
- exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
- return __readpages_done(or, pcol, false);
+ exofs_oi_read(oi, pcol->ios);
+ return __readpages_done(pcol, false);
}
pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
@@ -277,14 +285,16 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
}
*pcol_copy = *pcol;
- ret = exofs_async_op(or, readpages_done, pcol_copy, oi->i_cred);
+ ios->done = readpages_done;
+ ios->private = pcol_copy;
+ ret = exofs_oi_read(oi, ios);
if (unlikely(ret))
goto err;
atomic_inc(&pcol->sbi->s_curr_pending);
- EXOFS_DBGMSG("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
- obj.id, _LLU(i_start), pcol->length);
+ EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
+ ios->obj.id, _LLU(ios->offset), pcol->length);
/* pages ownership was passed to pcol_copy */
_pcol_reset(pcol);
@@ -293,12 +303,10 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
err:
if (!is_sync)
_unlock_pcol_pages(pcol, ret, READ);
- else /* Pages unlocked by caller in sync mode only free bio */
- pcol_free(pcol);
+
+ pcol_free(pcol);
kfree(pcol_copy);
- if (or)
- osd_end_request(or);
return ret;
}
@@ -361,7 +369,7 @@ try_again:
goto try_again;
}
- if (!pcol->bio) {
+ if (!pcol->pages) {
ret = pcol_try_alloc(pcol);
if (unlikely(ret))
goto fail;
@@ -370,12 +378,12 @@ try_again:
if (len != PAGE_CACHE_SIZE)
zero_user(page, len, PAGE_CACHE_SIZE - len);
- EXOFS_DBGMSG(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
+ EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
inode->i_ino, page->index, len);
ret = pcol_add_page(pcol, page, len);
if (ret) {
- EXOFS_DBGMSG("Failed pcol_add_page pages[i]=%p "
+ EXOFS_DBGMSG2("Failed pcol_add_page pages[i]=%p "
"this_len=0x%zx nr_pages=%u length=0x%lx\n",
page, len, pcol->nr_pages, pcol->length);
@@ -419,9 +427,8 @@ static int _readpage(struct page *page, bool is_sync)
_pcol_init(&pcol, 1, page->mapping->host);
- /* readpage_strip might call read_exec(,async) inside at several places
- * but this is safe for is_async=0 since read_exec will not do anything
- * when we have a single page.
+ /* readpage_strip might call read_exec(,is_sync==false) at several
+ * places but not if we have a single page.
*/
ret = readpage_strip(&pcol, page);
if (ret) {
@@ -440,35 +447,30 @@ static int exofs_readpage(struct file *file, struct page *page)
return _readpage(page, false);
}
-/* Callback for osd_write. All writes are asynchronouse */
-static void writepages_done(struct osd_request *or, void *p)
+/* Callback for osd_write. All writes are asynchronous */
+static void writepages_done(struct exofs_io_state *ios, void *p)
{
struct page_collect *pcol = p;
- struct bio_vec *bvec;
int i;
u64 resid;
u64 good_bytes;
u64 length = 0;
+ int ret = exofs_check_io(ios, &resid);
- int ret = exofs_check_ok_resid(or, NULL, &resid);
-
- osd_end_request(or);
atomic_dec(&pcol->sbi->s_curr_pending);
if (likely(!ret))
good_bytes = pcol->length;
- else if (!resid)
- good_bytes = 0;
else
good_bytes = pcol->length - resid;
- EXOFS_DBGMSG("writepages_done(0x%lx) good_bytes=0x%llx"
+ EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx"
" length=0x%lx nr_pages=%u\n",
pcol->inode->i_ino, _LLU(good_bytes), pcol->length,
pcol->nr_pages);
- __bio_for_each_segment(bvec, pcol->bio, i, 0) {
- struct page *page = bvec->bv_page;
+ for (i = 0; i < pcol->nr_pages; i++) {
+ struct page *page = pcol->pages[i];
struct inode *inode = page->mapping->host;
int page_stat;
@@ -482,37 +484,27 @@ static void writepages_done(struct osd_request *or, void *p)
update_write_page(page, page_stat);
unlock_page(page);
- EXOFS_DBGMSG(" writepages_done(0x%lx, 0x%lx) status=%d\n",
+ EXOFS_DBGMSG2(" writepages_done(0x%lx, 0x%lx) status=%d\n",
inode->i_ino, page->index, page_stat);
- length += bvec->bv_len;
+ length += PAGE_SIZE;
}
pcol_free(pcol);
kfree(pcol);
- EXOFS_DBGMSG("writepages_done END\n");
+ EXOFS_DBGMSG2("writepages_done END\n");
}
static int write_exec(struct page_collect *pcol)
{
struct exofs_i_info *oi = exofs_i(pcol->inode);
- struct osd_obj_id obj = {pcol->sbi->s_pid,
- pcol->inode->i_ino + EXOFS_OBJ_OFF};
- struct osd_request *or = NULL;
+ struct exofs_io_state *ios = pcol->ios;
struct page_collect *pcol_copy = NULL;
- loff_t i_start = pcol->pg_first << PAGE_CACHE_SHIFT;
int ret;
- if (!pcol->bio)
+ if (!pcol->pages)
return 0;
- or = osd_start_request(pcol->sbi->s_dev, GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("write_exec: Faild to osd_start_request()\n");
- ret = -ENOMEM;
- goto err;
- }
-
pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
if (!pcol_copy) {
EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n");
@@ -522,17 +514,22 @@ static int write_exec(struct page_collect *pcol)
*pcol_copy = *pcol;
- pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
- osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length);
- ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
+ ios->pages = pcol_copy->pages;
+ ios->nr_pages = pcol_copy->nr_pages;
+ ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
+ ios->length = pcol_copy->length;
+ ios->done = writepages_done;
+ ios->private = pcol_copy;
+
+ ret = exofs_oi_write(oi, ios);
if (unlikely(ret)) {
- EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
+ EXOFS_ERR("write_exec: exofs_oi_write() Faild\n");
goto err;
}
atomic_inc(&pcol->sbi->s_curr_pending);
- EXOFS_DBGMSG("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
- pcol->inode->i_ino, pcol->pg_first, _LLU(i_start),
+ EXOFS_DBGMSG2("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
+ pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
pcol->length);
/* pages ownership was passed to pcol_copy */
_pcol_reset(pcol);
@@ -540,9 +537,9 @@ static int write_exec(struct page_collect *pcol)
err:
_unlock_pcol_pages(pcol, ret, WRITE);
+ pcol_free(pcol);
kfree(pcol_copy);
- if (or)
- osd_end_request(or);
+
return ret;
}
@@ -586,6 +583,9 @@ static int writepage_strip(struct page *page,
if (PageError(page))
ClearPageError(page);
unlock_page(page);
+ EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) "
+ "outside the limits\n",
+ inode->i_ino, page->index);
return 0;
}
}
@@ -600,21 +600,24 @@ try_again:
ret = write_exec(pcol);
if (unlikely(ret))
goto fail;
+
+ EXOFS_DBGMSG("writepage_strip(0x%lx, 0x%lx) Discontinuity\n",
+ inode->i_ino, page->index);
goto try_again;
}
- if (!pcol->bio) {
+ if (!pcol->pages) {
ret = pcol_try_alloc(pcol);
if (unlikely(ret))
goto fail;
}
- EXOFS_DBGMSG(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n",
+ EXOFS_DBGMSG2(" writepage_strip(0x%lx, 0x%lx) len=0x%zx\n",
inode->i_ino, page->index, len);
ret = pcol_add_page(pcol, page, len);
if (unlikely(ret)) {
- EXOFS_DBGMSG("Failed pcol_add_page "
+ EXOFS_DBGMSG2("Failed pcol_add_page "
"nr_pages=%u total_length=0x%lx\n",
pcol->nr_pages, pcol->length);
@@ -634,6 +637,8 @@ try_again:
return 0;
fail:
+ EXOFS_DBGMSG("Error: writepage_strip(0x%lx, 0x%lx)=>%d\n",
+ inode->i_ino, page->index, ret);
set_bit(AS_EIO, &page->mapping->flags);
unlock_page(page);
return ret;
@@ -652,14 +657,17 @@ static int exofs_writepages(struct address_space *mapping,
wbc->range_end >> PAGE_CACHE_SHIFT;
if (start || end)
- expected_pages = min(end - start + 1, 32L);
+ expected_pages = end - start + 1;
else
expected_pages = mapping->nrpages;
- EXOFS_DBGMSG("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx"
- " m->nrpages=%lu start=0x%lx end=0x%lx\n",
+ if (expected_pages < 32L)
+ expected_pages = 32L;
+
+ EXOFS_DBGMSG2("inode(0x%lx) wbc->start=0x%llx wbc->end=0x%llx "
+ "nrpages=%lu start=0x%lx end=0x%lx expected_pages=%ld\n",
mapping->host->i_ino, wbc->range_start, wbc->range_end,
- mapping->nrpages, start, end);
+ mapping->nrpages, start, end, expected_pages);
_pcol_init(&pcol, expected_pages, mapping->host);
@@ -731,13 +739,28 @@ static int exofs_write_begin_export(struct file *file,
fsdata);
}
+static int exofs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ /* According to comment in simple_write_end i_mutex is held */
+ loff_t i_size = inode->i_size;
+ int ret;
+
+ ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
+ if (i_size != inode->i_size)
+ mark_inode_dirty(inode);
+ return ret;
+}
+
const struct address_space_operations exofs_aops = {
.readpage = exofs_readpage,
.readpages = exofs_readpages,
.writepage = exofs_writepage,
.writepages = exofs_writepages,
.write_begin = exofs_write_begin_export,
- .write_end = simple_write_end,
+ .write_end = exofs_write_end,
};
/******************************************************************************
@@ -771,19 +794,28 @@ static int exofs_get_block(struct inode *inode, sector_t iblock,
const struct osd_attr g_attr_logical_length = ATTR_DEF(
OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
+static int _do_truncate(struct inode *inode)
+{
+ struct exofs_i_info *oi = exofs_i(inode);
+ loff_t isize = i_size_read(inode);
+ int ret;
+
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+
+ nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
+
+ ret = exofs_oi_truncate(oi, (u64)isize);
+ EXOFS_DBGMSG("(0x%lx) size=0x%llx\n", inode->i_ino, isize);
+ return ret;
+}
+
/*
* Truncate a file to the specified size - all we have to do is set the size
* attribute. We make sure the object exists first.
*/
void exofs_truncate(struct inode *inode)
{
- struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
struct exofs_i_info *oi = exofs_i(inode);
- struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
- struct osd_request *or;
- struct osd_attr attr;
- loff_t isize = i_size_read(inode);
- __be64 newsize;
int ret;
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
@@ -793,22 +825,6 @@ void exofs_truncate(struct inode *inode)
return;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
return;
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-
- nobh_truncate_page(inode->i_mapping, isize, exofs_get_block);
-
- or = osd_start_request(sbi->s_dev, GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("ERROR: exofs_truncate: osd_start_request failed\n");
- goto fail;
- }
-
- osd_req_set_attributes(or, &obj);
-
- newsize = cpu_to_be64((u64)isize);
- attr = g_attr_logical_length;
- attr.val_ptr = &newsize;
- osd_req_add_set_attr_list(or, &attr, 1);
/* if we are about to truncate an object, and it hasn't been
* created yet, wait
@@ -816,8 +832,7 @@ void exofs_truncate(struct inode *inode)
if (unlikely(wait_obj_created(oi)))
goto fail;
- ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
- osd_end_request(or);
+ ret = _do_truncate(inode);
if (ret)
goto fail;
@@ -845,67 +860,110 @@ int exofs_setattr(struct dentry *dentry, struct iattr *iattr)
return error;
}
+static const struct osd_attr g_attr_inode_file_layout = ATTR_DEF(
+ EXOFS_APAGE_FS_DATA,
+ EXOFS_ATTR_INODE_FILE_LAYOUT,
+ 0);
+static const struct osd_attr g_attr_inode_dir_layout = ATTR_DEF(
+ EXOFS_APAGE_FS_DATA,
+ EXOFS_ATTR_INODE_DIR_LAYOUT,
+ 0);
+
/*
- * Read an inode from the OSD, and return it as is. We also return the size
- * attribute in the 'sanity' argument if we got compiled with debugging turned
- * on.
+ * Read the Linux inode info from the OSD, and return it as is. In exofs the
+ * inode info is in an application specific page/attribute of the osd-object.
*/
static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
- struct exofs_fcb *inode, uint64_t *sanity)
+ struct exofs_fcb *inode)
{
struct exofs_sb_info *sbi = sb->s_fs_info;
- struct osd_request *or;
- struct osd_attr attr;
- struct osd_obj_id obj = {sbi->s_pid,
- oi->vfs_inode.i_ino + EXOFS_OBJ_OFF};
+ struct osd_attr attrs[] = {
+ [0] = g_attr_inode_data,
+ [1] = g_attr_inode_file_layout,
+ [2] = g_attr_inode_dir_layout,
+ };
+ struct exofs_io_state *ios;
+ struct exofs_on_disk_inode_layout *layout;
int ret;
- exofs_make_credential(oi->i_cred, &obj);
-
- or = osd_start_request(sbi->s_dev, GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("exofs_get_inode: osd_start_request failed.\n");
- return -ENOMEM;
+ ret = exofs_get_io_state(&sbi->layout, &ios);
+ if (unlikely(ret)) {
+ EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+ return ret;
}
- osd_req_get_attributes(or, &obj);
- /* we need the inode attribute */
- osd_req_add_get_attr_list(or, &g_attr_inode_data, 1);
+ ios->obj.id = exofs_oi_objno(oi);
+ exofs_make_credential(oi->i_cred, &ios->obj);
+ ios->cred = oi->i_cred;
-#ifdef EXOFS_DEBUG_OBJ_ISIZE
- /* we get the size attributes to do a sanity check */
- osd_req_add_get_attr_list(or, &g_attr_logical_length, 1);
-#endif
+ attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
+ attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
- ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
- if (ret)
+ ios->in_attr = attrs;
+ ios->in_attr_len = ARRAY_SIZE(attrs);
+
+ ret = exofs_sbi_read(ios);
+ if (unlikely(ret)) {
+ EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n",
+ _LLU(ios->obj.id), ret);
+ memset(inode, 0, sizeof(*inode));
+ inode->i_mode = 0040000 | (0777 & ~022);
+ /* If object is lost on target we might as well enable it's
+ * delete.
+ */
+ if ((ret == -ENOENT) || (ret == -EINVAL))
+ ret = 0;
goto out;
+ }
- attr = g_attr_inode_data;
- ret = extract_attr_from_req(or, &attr);
+ ret = extract_attr_from_ios(ios, &attrs[0]);
if (ret) {
- EXOFS_ERR("exofs_get_inode: extract_attr_from_req failed\n");
+ EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
goto out;
}
+ WARN_ON(attrs[0].len != EXOFS_INO_ATTR_SIZE);
+ memcpy(inode, attrs[0].val_ptr, EXOFS_INO_ATTR_SIZE);
- WARN_ON(attr.len != EXOFS_INO_ATTR_SIZE);
- memcpy(inode, attr.val_ptr, EXOFS_INO_ATTR_SIZE);
+ ret = extract_attr_from_ios(ios, &attrs[1]);
+ if (ret) {
+ EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
+ goto out;
+ }
+ if (attrs[1].len) {
+ layout = attrs[1].val_ptr;
+ if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) {
+ EXOFS_ERR("%s: unsupported files layout %d\n",
+ __func__, layout->gen_func);
+ ret = -ENOTSUPP;
+ goto out;
+ }
+ }
-#ifdef EXOFS_DEBUG_OBJ_ISIZE
- attr = g_attr_logical_length;
- ret = extract_attr_from_req(or, &attr);
+ ret = extract_attr_from_ios(ios, &attrs[2]);
if (ret) {
- EXOFS_ERR("ERROR: extract attr from or failed\n");
+ EXOFS_ERR("%s: extract_attr of inode_data failed\n", __func__);
goto out;
}
- *sanity = get_unaligned_be64(attr.val_ptr);
-#endif
+ if (attrs[2].len) {
+ layout = attrs[2].val_ptr;
+ if (layout->gen_func != cpu_to_le16(LAYOUT_MOVING_WINDOW)) {
+ EXOFS_ERR("%s: unsupported meta-data layout %d\n",
+ __func__, layout->gen_func);
+ ret = -ENOTSUPP;
+ goto out;
+ }
+ }
out:
- osd_end_request(or);
+ exofs_put_io_state(ios);
return ret;
}
+static void __oi_init(struct exofs_i_info *oi)
+{
+ init_waitqueue_head(&oi->i_wq);
+ oi->i_flags = 0;
+}
/*
* Fill in an inode read from the OSD and set it up for use
*/
@@ -914,7 +972,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
struct exofs_i_info *oi;
struct exofs_fcb fcb;
struct inode *inode;
- uint64_t uninitialized_var(sanity);
int ret;
inode = iget_locked(sb, ino);
@@ -923,13 +980,13 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
if (!(inode->i_state & I_NEW))
return inode;
oi = exofs_i(inode);
+ __oi_init(oi);
/* read the inode from the osd */
- ret = exofs_get_inode(sb, oi, &fcb, &sanity);
+ ret = exofs_get_inode(sb, oi, &fcb);
if (ret)
goto bad_inode;
- init_waitqueue_head(&oi->i_wq);
set_obj_created(oi);
/* copy stuff from on-disk struct to in-memory struct */
@@ -947,15 +1004,6 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
inode->i_blkbits = EXOFS_BLKSHIFT;
inode->i_generation = le32_to_cpu(fcb.i_generation);
-#ifdef EXOFS_DEBUG_OBJ_ISIZE
- if ((inode->i_size != sanity) &&
- (!exofs_inode_is_fast_symlink(inode))) {
- EXOFS_ERR("WARNING: Size of object from inode and "
- "attributes differ (%lld != %llu)\n",
- inode->i_size, _LLU(sanity));
- }
-#endif
-
oi->i_dir_start_lookup = 0;
if ((inode->i_nlink == 0) && (inode->i_mode == 0)) {
@@ -1020,23 +1068,30 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
* set the obj_created flag so that other methods know that the object exists on
* the OSD.
*/
-static void create_done(struct osd_request *or, void *p)
+static void create_done(struct exofs_io_state *ios, void *p)
{
struct inode *inode = p;
struct exofs_i_info *oi = exofs_i(inode);
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
int ret;
- ret = exofs_check_ok(or);
- osd_end_request(or);
+ ret = exofs_check_io(ios, NULL);
+ exofs_put_io_state(ios);
+
atomic_dec(&sbi->s_curr_pending);
if (unlikely(ret)) {
EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
- _LLU(sbi->s_pid), _LLU(inode->i_ino + EXOFS_OBJ_OFF));
- make_bad_inode(inode);
- } else
- set_obj_created(oi);
+ _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid));
+ /*TODO: When FS is corrupted creation can fail, object already
+ * exist. Get rid of this asynchronous creation, if exist
+ * increment the obj counter and try the next object. Until we
+ * succeed. All these dangling objects will be made into lost
+ * files by chkfs.exofs
+ */
+ }
+
+ set_obj_created(oi);
atomic_dec(&inode->i_count);
wake_up(&oi->i_wq);
@@ -1051,8 +1106,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
struct inode *inode;
struct exofs_i_info *oi;
struct exofs_sb_info *sbi;
- struct osd_request *or;
- struct osd_obj_id obj;
+ struct exofs_io_state *ios;
int ret;
sb = dir->i_sb;
@@ -1061,8 +1115,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
return ERR_PTR(-ENOMEM);
oi = exofs_i(inode);
+ __oi_init(oi);
- init_waitqueue_head(&oi->i_wq);
set_obj_2bcreated(oi);
sbi = sb->s_fs_info;
@@ -1089,28 +1143,28 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
mark_inode_dirty(inode);
- obj.partition = sbi->s_pid;
- obj.id = inode->i_ino + EXOFS_OBJ_OFF;
- exofs_make_credential(oi->i_cred, &obj);
-
- or = osd_start_request(sbi->s_dev, GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("exofs_new_inode: osd_start_request failed\n");
- return ERR_PTR(-ENOMEM);
+ ret = exofs_get_io_state(&sbi->layout, &ios);
+ if (unlikely(ret)) {
+ EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
+ return ERR_PTR(ret);
}
- osd_req_create_object(or, &obj);
+ ios->obj.id = exofs_oi_objno(oi);
+ exofs_make_credential(oi->i_cred, &ios->obj);
/* increment the refcount so that the inode will still be around when we
* reach the callback
*/
atomic_inc(&inode->i_count);
- ret = exofs_async_op(or, create_done, inode, oi->i_cred);
+ ios->done = create_done;
+ ios->private = inode;
+ ios->cred = oi->i_cred;
+ ret = exofs_sbi_create(ios);
if (ret) {
atomic_dec(&inode->i_count);
- osd_end_request(or);
- return ERR_PTR(-EIO);
+ exofs_put_io_state(ios);
+ return ERR_PTR(ret);
}
atomic_inc(&sbi->s_curr_pending);
@@ -1128,11 +1182,11 @@ struct updatei_args {
/*
* Callback function from exofs_update_inode().
*/
-static void updatei_done(struct osd_request *or, void *p)
+static void updatei_done(struct exofs_io_state *ios, void *p)
{
struct updatei_args *args = p;
- osd_end_request(or);
+ exofs_put_io_state(ios);
atomic_dec(&args->sbi->s_curr_pending);
@@ -1148,16 +1202,17 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
struct exofs_i_info *oi = exofs_i(inode);
struct super_block *sb = inode->i_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
- struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
- struct osd_request *or;
+ struct exofs_io_state *ios;
struct osd_attr attr;
struct exofs_fcb *fcb;
struct updatei_args *args;
int ret;
args = kzalloc(sizeof(*args), GFP_KERNEL);
- if (!args)
+ if (!args) {
+ EXOFS_DBGMSG("Faild kzalloc of args\n");
return -ENOMEM;
+ }
fcb = &args->fcb;
@@ -1186,18 +1241,16 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
} else
memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
- or = osd_start_request(sbi->s_dev, GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("exofs_update_inode: osd_start_request failed.\n");
- ret = -ENOMEM;
+ ret = exofs_get_io_state(&sbi->layout, &ios);
+ if (unlikely(ret)) {
+ EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
goto free_args;
}
- osd_req_set_attributes(or, &obj);
-
attr = g_attr_inode_data;
attr.val_ptr = fcb;
- osd_req_add_set_attr_list(or, &attr, 1);
+ ios->out_attr_len = 1;
+ ios->out_attr = &attr;
if (!obj_created(oi)) {
EXOFS_DBGMSG("!obj_created\n");
@@ -1206,43 +1259,42 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
EXOFS_DBGMSG("wait_event done\n");
}
- if (do_sync) {
- ret = exofs_sync_op(or, sbi->s_timeout, oi->i_cred);
- osd_end_request(or);
- goto free_args;
- } else {
+ if (!do_sync) {
args->sbi = sbi;
+ ios->done = updatei_done;
+ ios->private = args;
+ }
- ret = exofs_async_op(or, updatei_done, args, oi->i_cred);
- if (ret) {
- osd_end_request(or);
- goto free_args;
- }
+ ret = exofs_oi_write(oi, ios);
+ if (!do_sync && !ret) {
atomic_inc(&sbi->s_curr_pending);
goto out; /* deallocation in updatei_done */
}
+ exofs_put_io_state(ios);
free_args:
kfree(args);
out:
- EXOFS_DBGMSG("ret=>%d\n", ret);
+ EXOFS_DBGMSG("(0x%lx) do_sync=%d ret=>%d\n",
+ inode->i_ino, do_sync, ret);
return ret;
}
-int exofs_write_inode(struct inode *inode, int wait)
+int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
- return exofs_update_inode(inode, wait);
+ return exofs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
}
/*
* Callback function from exofs_delete_inode() - don't have much cleaning up to
* do.
*/
-static void delete_done(struct osd_request *or, void *p)
+static void delete_done(struct exofs_io_state *ios, void *p)
{
- struct exofs_sb_info *sbi;
- osd_end_request(or);
- sbi = p;
+ struct exofs_sb_info *sbi = p;
+
+ exofs_put_io_state(ios);
+
atomic_dec(&sbi->s_curr_pending);
}
@@ -1256,8 +1308,7 @@ void exofs_delete_inode(struct inode *inode)
struct exofs_i_info *oi = exofs_i(inode);
struct super_block *sb = inode->i_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
- struct osd_obj_id obj = {sbi->s_pid, inode->i_ino + EXOFS_OBJ_OFF};
- struct osd_request *or;
+ struct exofs_io_state *ios;
int ret;
truncate_inode_pages(&inode->i_data, 0);
@@ -1274,25 +1325,26 @@ void exofs_delete_inode(struct inode *inode)
clear_inode(inode);
- or = osd_start_request(sbi->s_dev, GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("exofs_delete_inode: osd_start_request failed\n");
+ ret = exofs_get_io_state(&sbi->layout, &ios);
+ if (unlikely(ret)) {
+ EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
return;
}
- osd_req_remove_object(or, &obj);
-
/* if we are deleting an obj that hasn't been created yet, wait */
if (!obj_created(oi)) {
BUG_ON(!obj_2bcreated(oi));
wait_event(oi->i_wq, obj_created(oi));
}
- ret = exofs_async_op(or, delete_done, sbi, oi->i_cred);
+ ios->obj.id = exofs_oi_objno(oi);
+ ios->done = delete_done;
+ ios->private = sbi;
+ ios->cred = oi->i_cred;
+ ret = exofs_sbi_remove(ios);
if (ret) {
- EXOFS_ERR(
- "ERROR: @exofs_delete_inode exofs_async_op failed\n");
- osd_end_request(or);
+ EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__);
+ exofs_put_io_state(ios);
return;
}
atomic_inc(&sbi->s_curr_pending);
OpenPOWER on IntegriCloud