diff options
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 14 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 68 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 45 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.c | 54 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_req.h | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 24 |
6 files changed, 69 insertions, 137 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e9654c8d5b62..485ed8c7d623 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -943,8 +943,7 @@ struct drbd_conf { struct drbd_work resync_work, unplug_work, md_sync_work, - delay_probe_work, - uuid_work; + delay_probe_work; struct timer_list resync_timer; struct timer_list md_sync_timer; struct timer_list delay_probe_timer; @@ -1069,7 +1068,6 @@ struct drbd_conf { struct timeval dps_time; /* delay-probes-start-time */ unsigned int dp_volume_last; /* send_cnt of last delay probe */ int c_sync_rate; /* current resync rate after delay_probe magic */ - atomic_t new_c_uuid; }; static inline struct drbd_conf *minor_to_mdev(unsigned int minor) @@ -1476,7 +1474,6 @@ extern int w_e_end_ov_req(struct drbd_conf *, struct drbd_work *, int); extern int w_ov_finished(struct drbd_conf *, struct drbd_work *, int); extern int w_resync_inactive(struct drbd_conf *, struct drbd_work *, int); extern int w_resume_next_sg(struct drbd_conf *, struct drbd_work *, int); -extern int w_io_error(struct drbd_conf *, struct drbd_work *, int); extern int w_send_write_hint(struct drbd_conf *, struct drbd_work *, int); extern int w_make_resync_request(struct drbd_conf *, struct drbd_work *, int); extern int w_send_dblock(struct drbd_conf *, struct drbd_work *, int); @@ -1542,7 +1539,7 @@ static inline void drbd_tcp_nodelay(struct socket *sock) static inline void drbd_tcp_quickack(struct socket *sock) { - int __user val = 1; + int __user val = 2; (void) drbd_setsockopt(sock, SOL_TCP, TCP_QUICKACK, (char __user *)&val, sizeof(val)); } @@ -1728,7 +1725,7 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach, switch (mdev->ldev->dc.on_io_error) { case EP_PASS_ON: if (!forcedetach) { - if (printk_ratelimit()) + if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Local IO failed in %s." "Passing error on...\n", where); break; @@ -2219,8 +2216,6 @@ static inline int __inc_ap_bio_cond(struct drbd_conf *mdev) return 0; if (test_bit(BITMAP_IO, &mdev->flags)) return 0; - if (atomic_read(&mdev->new_c_uuid)) - return 0; return 1; } @@ -2241,9 +2236,6 @@ static inline void inc_ap_bio(struct drbd_conf *mdev, int count) * to avoid races with the reconnect code, * we need to atomic_inc within the spinlock. */ - if (atomic_read(&mdev->new_c_uuid) && atomic_add_unless(&mdev->new_c_uuid, -1, 1)) - drbd_queue_work_front(&mdev->data.work, &mdev->uuid_work); - spin_lock_irq(&mdev->req_lock); while (!__inc_ap_bio_cond(mdev)) { prepare_to_wait(&mdev->misc_wait, &wait, TASK_UNINTERRUPTIBLE); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index be2d2da9cdba..6b077f93acc6 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1215,18 +1215,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, ns.pdsk == D_OUTDATED)) { if (get_ldev(mdev)) { if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) && - mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE && - !atomic_read(&mdev->new_c_uuid)) - atomic_set(&mdev->new_c_uuid, 2); + mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { + drbd_uuid_new_current(mdev); + drbd_send_uuids(mdev); + } put_ldev(mdev); } } if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) { - /* Diskless peer becomes primary or got connected do diskless, primary peer. */ - if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0 && - !atomic_read(&mdev->new_c_uuid)) - atomic_set(&mdev->new_c_uuid, 2); + if (ns.peer == R_PRIMARY && mdev->ldev->md.uuid[UI_BITMAP] == 0) + drbd_uuid_new_current(mdev); /* D_DISKLESS Peer becomes secondary */ if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) @@ -1350,24 +1349,6 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, drbd_md_sync(mdev); } -static int w_new_current_uuid(struct drbd_conf *mdev, struct drbd_work *w, int cancel) -{ - if (get_ldev(mdev)) { - if (mdev->ldev->md.uuid[UI_BITMAP] == 0) { - drbd_uuid_new_current(mdev); - if (get_net_conf(mdev)) { - drbd_send_uuids(mdev); - put_net_conf(mdev); - } - drbd_md_sync(mdev); - } - put_ldev(mdev); - } - atomic_dec(&mdev->new_c_uuid); - wake_up(&mdev->misc_wait); - - return 1; -} static int drbd_thread_setup(void *arg) { @@ -2291,9 +2272,9 @@ static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket * * with page_count == 0 or PageSlab. */ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, - int offset, size_t size) + int offset, size_t size, unsigned msg_flags) { - int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, 0); + int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags); kunmap(page); if (sent == size) mdev->send_cnt += size>>9; @@ -2301,7 +2282,7 @@ static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page, } static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, - int offset, size_t size) + int offset, size_t size, unsigned msg_flags) { mm_segment_t oldfs = get_fs(); int sent, ok; @@ -2314,14 +2295,15 @@ static int _drbd_send_page(struct drbd_conf *mdev, struct page *page, * __page_cache_release a page that would actually still be referenced * by someone, leading to some obscure delayed Oops somewhere else. */ if (disable_sendpage || (page_count(page) < 1) || PageSlab(page)) - return _drbd_no_send_page(mdev, page, offset, size); + return _drbd_no_send_page(mdev, page, offset, size, msg_flags); + msg_flags |= MSG_NOSIGNAL; drbd_update_congested(mdev); set_fs(KERNEL_DS); do { sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page, offset, len, - MSG_NOSIGNAL); + msg_flags); if (sent == -EAGAIN) { if (we_should_drop_the_connection(mdev, mdev->data.socket)) @@ -2350,9 +2332,11 @@ static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio) { struct bio_vec *bvec; int i; + /* hint all but last page with MSG_MORE */ __bio_for_each_segment(bvec, bio, i, 0) { if (!_drbd_no_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len)) + bvec->bv_offset, bvec->bv_len, + i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) return 0; } return 1; @@ -2362,12 +2346,13 @@ static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio) { struct bio_vec *bvec; int i; + /* hint all but last page with MSG_MORE */ __bio_for_each_segment(bvec, bio, i, 0) { if (!_drbd_send_page(mdev, bvec->bv_page, - bvec->bv_offset, bvec->bv_len)) + bvec->bv_offset, bvec->bv_len, + i == bio->bi_vcnt -1 ? 0 : MSG_MORE)) return 0; } - return 1; } @@ -2375,9 +2360,11 @@ static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) { struct page *page = e->pages; unsigned len = e->size; + /* hint all but last page with MSG_MORE */ page_chain_for_each(page) { unsigned l = min_t(unsigned, len, PAGE_SIZE); - if (!_drbd_send_page(mdev, page, 0, l)) + if (!_drbd_send_page(mdev, page, 0, l, + page_chain_next(page) ? MSG_MORE : 0)) return 0; len -= l; } @@ -2457,11 +2444,11 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) p.dp_flags = cpu_to_be32(dp_flags); set_bit(UNPLUG_REMOTE, &mdev->flags); ok = (sizeof(p) == - drbd_send(mdev, mdev->data.socket, &p, sizeof(p), MSG_MORE)); + drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0)); if (ok && dgs) { dgb = mdev->int_dig_out; drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb); - ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); } if (ok) { if (mdev->net_conf->wire_protocol == DRBD_PROT_A) @@ -2510,11 +2497,11 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, return 0; ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, - sizeof(p), MSG_MORE); + sizeof(p), dgs ? MSG_MORE : 0); if (ok && dgs) { dgb = mdev->int_dig_out; drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb); - ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, MSG_MORE); + ok = drbd_send(mdev, mdev->data.socket, dgb, dgs, 0); } if (ok) ok = _drbd_send_zc_ee(mdev, e); @@ -2708,7 +2695,6 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) atomic_set(&mdev->net_cnt, 0); atomic_set(&mdev->packet_seq, 0); atomic_set(&mdev->pp_in_use, 0); - atomic_set(&mdev->new_c_uuid, 0); mutex_init(&mdev->md_io_mutex); mutex_init(&mdev->data.mutex); @@ -2739,14 +2725,12 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) INIT_LIST_HEAD(&mdev->bm_io_work.w.list); INIT_LIST_HEAD(&mdev->delay_probes); INIT_LIST_HEAD(&mdev->delay_probe_work.list); - INIT_LIST_HEAD(&mdev->uuid_work.list); mdev->resync_work.cb = w_resync_inactive; mdev->unplug_work.cb = w_send_write_hint; mdev->md_sync_work.cb = w_md_sync; mdev->bm_io_work.w.cb = w_bitmap_io; mdev->delay_probe_work.cb = w_delay_probes; - mdev->uuid_work.cb = w_new_current_uuid; init_timer(&mdev->resync_timer); init_timer(&mdev->md_sync_timer); init_timer(&mdev->delay_probe_timer); @@ -3799,7 +3783,7 @@ _drbd_insert_fault(struct drbd_conf *mdev, unsigned int type) if (ret) { fault_count++; - if (printk_ratelimit()) + if (__ratelimit(&drbd_ratelimit_state)) dev_warn(DEV, "***Simulating %s failure\n", _drbd_fault_str(type)); } diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index bc9ab7fb2cc7..dff48701b84d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -42,7 +42,6 @@ #include <linux/unistd.h> #include <linux/vmalloc.h> #include <linux/random.h> -#include <linux/mm.h> #include <linux/string.h> #include <linux/scatterlist.h> #include "drbd_int.h" @@ -571,6 +570,25 @@ static int drbd_recv(struct drbd_conf *mdev, void *buf, size_t size) return rv; } +/* quoting tcp(7): + * On individual connections, the socket buffer size must be set prior to the + * listen(2) or connect(2) calls in order to have it take effect. + * This is our wrapper to do so. + */ +static void drbd_setbufsize(struct socket *sock, unsigned int snd, + unsigned int rcv) +{ + /* open coded SO_SNDBUF, SO_RCVBUF */ + if (snd) { + sock->sk->sk_sndbuf = snd; + sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; + } + if (rcv) { + sock->sk->sk_rcvbuf = rcv; + sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; + } +} + static struct socket *drbd_try_connect(struct drbd_conf *mdev) { const char *what; @@ -592,6 +610,8 @@ static struct socket *drbd_try_connect(struct drbd_conf *mdev) sock->sk->sk_rcvtimeo = sock->sk->sk_sndtimeo = mdev->net_conf->try_connect_int*HZ; + drbd_setbufsize(sock, mdev->net_conf->sndbuf_size, + mdev->net_conf->rcvbuf_size); /* explicitly bind to the configured IP as source IP * for the outgoing connections. @@ -670,6 +690,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_conf *mdev) s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */ s_listen->sk->sk_rcvtimeo = timeo; s_listen->sk->sk_sndtimeo = timeo; + drbd_setbufsize(s_listen, mdev->net_conf->sndbuf_size, + mdev->net_conf->rcvbuf_size); what = "bind before listen"; err = s_listen->ops->bind(s_listen, @@ -856,16 +878,6 @@ retry: sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK; msock->sk->sk_priority = TC_PRIO_INTERACTIVE; - if (mdev->net_conf->sndbuf_size) { - sock->sk->sk_sndbuf = mdev->net_conf->sndbuf_size; - sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK; - } - - if (mdev->net_conf->rcvbuf_size) { - sock->sk->sk_rcvbuf = mdev->net_conf->rcvbuf_size; - sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK; - } - /* NOT YET ... * sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10; * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; @@ -1154,17 +1166,6 @@ int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, unsigned n_bios = 0; unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; - if (atomic_read(&mdev->new_c_uuid)) { - if (atomic_add_unless(&mdev->new_c_uuid, -1, 1)) { - drbd_uuid_new_current(mdev); - drbd_md_sync(mdev); - - atomic_dec(&mdev->new_c_uuid); - wake_up(&mdev->misc_wait); - } - wait_event(mdev->misc_wait, !atomic_read(&mdev->new_c_uuid)); - } - /* In most cases, we will only need one bio. But in case the lower * level restrictions happen to be different at this offset on this * side than those of the sending peer, we may need to submit the diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3397f11d0ba9..654f1ef5cbb0 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -102,32 +102,7 @@ static void _req_is_done(struct drbd_conf *mdev, struct drbd_request *req, const } } - /* if it was a local io error, we want to notify our - * peer about that, and see if we need to - * detach the disk and stuff. - * to avoid allocating some special work - * struct, reuse the request. */ - - /* THINK - * why do we do this not when we detect the error, - * but delay it until it is "done", i.e. possibly - * until the next barrier ack? */ - - if (rw == WRITE && - ((s & RQ_LOCAL_MASK) && !(s & RQ_LOCAL_OK))) { - if (!(req->w.list.next == LIST_POISON1 || - list_empty(&req->w.list))) { - /* DEBUG ASSERT only; if this triggers, we - * probably corrupt the worker list here */ - dev_err(DEV, "req->w.list.next = %p\n", req->w.list.next); - dev_err(DEV, "req->w.list.prev = %p\n", req->w.list.prev); - } - req->w.cb = w_io_error; - drbd_queue_work(&mdev->data.work, &req->w); - /* drbd_req_free() is done in w_io_error */ - } else { - drbd_req_free(req); - } + drbd_req_free(req); } static void queue_barrier(struct drbd_conf *mdev) @@ -453,9 +428,6 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - dev_alert(DEV, "Local WRITE failed sec=%llus size=%u\n", - (unsigned long long)req->sector, req->size); - /* and now: check how to handle local io error. */ __drbd_chk_io_error(mdev, FALSE); _req_may_be_done(req, m); put_ldev(mdev); @@ -475,22 +447,21 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, req->rq_state |= RQ_LOCAL_COMPLETED; req->rq_state &= ~RQ_LOCAL_PENDING; - dev_alert(DEV, "Local READ failed sec=%llus size=%u\n", - (unsigned long long)req->sector, req->size); - /* _req_mod(req,to_be_send); oops, recursion... */ D_ASSERT(!(req->rq_state & RQ_NET_MASK)); - req->rq_state |= RQ_NET_PENDING; - inc_ap_pending(mdev); __drbd_chk_io_error(mdev, FALSE); put_ldev(mdev); - /* NOTE: if we have no connection, - * or know the peer has no good data either, - * then we don't actually need to "queue_for_net_read", - * but we do so anyways, since the drbd_io_error() - * and the potential state change to "Diskless" - * needs to be done from process context */ + /* no point in retrying if there is no good remote data, + * or we have no connection. */ + if (mdev->state.pdsk != D_UP_TO_DATE) { + _req_may_be_done(req, m); + break; + } + + /* _req_mod(req,to_be_send); oops, recursion... */ + req->rq_state |= RQ_NET_PENDING; + inc_ap_pending(mdev); /* fall through: _req_mod(req,queue_for_net_read); */ case queue_for_net_read: @@ -600,6 +571,9 @@ void __req_mod(struct drbd_request *req, enum drbd_req_event what, _req_may_be_done(req, m); break; + case read_retry_remote_canceled: + req->rq_state &= ~RQ_NET_QUEUED; + /* fall through, in case we raced with drbd_disconnect */ case connection_lost_while_pending: /* transfer log cleanup after connection loss */ /* assert something? */ diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index 16119d7056cc..02d575d24518 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h @@ -91,6 +91,7 @@ enum drbd_req_event { send_failed, handed_over_to_network, connection_lost_while_pending, + read_retry_remote_canceled, recv_acked_by_peer, write_acked_by_peer, write_acked_by_peer_and_sis, /* and set_in_sync */ diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 727ff6339754..b623ceee2a4a 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -224,9 +224,6 @@ void drbd_endio_pri(struct bio *bio, int error) enum drbd_req_event what; int uptodate = bio_flagged(bio, BIO_UPTODATE); - if (error) - dev_warn(DEV, "p %s: error=%d\n", - bio_data_dir(bio) == WRITE ? "write" : "read", error); if (!error && !uptodate) { dev_warn(DEV, "p %s: setting error to -EIO\n", bio_data_dir(bio) == WRITE ? "write" : "read"); @@ -257,20 +254,6 @@ void drbd_endio_pri(struct bio *bio, int error) complete_master_bio(mdev, &m); } -int w_io_error(struct drbd_conf *mdev, struct drbd_work *w, int cancel) -{ - struct drbd_request *req = container_of(w, struct drbd_request, w); - - /* NOTE: mdev->ldev can be NULL by the time we get here! */ - /* D_ASSERT(mdev->ldev->dc.on_io_error != EP_PASS_ON); */ - - /* the only way this callback is scheduled is from _req_may_be_done, - * when it is done and had a local write error, see comments there */ - drbd_req_free(req); - - return TRUE; -} - int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) { struct drbd_request *req = container_of(w, struct drbd_request, w); @@ -280,12 +263,9 @@ int w_read_retry_remote(struct drbd_conf *mdev, struct drbd_work *w, int cancel) * to give the disk the chance to relocate that block */ spin_lock_irq(&mdev->req_lock); - if (cancel || - mdev->state.conn < C_CONNECTED || - mdev->state.pdsk <= D_INCONSISTENT) { - _req_mod(req, send_canceled); + if (cancel || mdev->state.pdsk != D_UP_TO_DATE) { + _req_mod(req, read_retry_remote_canceled); spin_unlock_irq(&mdev->req_lock); - dev_alert(DEV, "WE ARE LOST. Local IO failure, no peer.\n"); return 1; } spin_unlock_irq(&mdev->req_lock); |