summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChuck Lever <chuck.lever@oracle.com>2010-05-07 13:34:47 -0400
committerTrond Myklebust <Trond.Myklebust@netapp.com>2010-05-14 15:09:33 -0400
commitff8399709e41bf72b4cb145612a0f9a9f7283c83 (patch)
tree5577163d56e30020d907245f00dcef17730f037b
parentf56916b97fe2031761ca611f0a342efd913afb33 (diff)
downloadtalos-op-linux-ff8399709e41bf72b4cb145612a0f9a9f7283c83.tar.gz
talos-op-linux-ff8399709e41bf72b4cb145612a0f9a9f7283c83.zip
SUNRPC: Replace jiffies-based metrics with ktime-based metrics
Currently RPC performance metrics that tabulate elapsed time use jiffies time values. This is problematic on systems that use slow jiffies (for instance 100HZ systems built for paravirtualized environments). It is also a problem for computing precise latency statistics for advanced network transports, such as InfiniBand, that can have round-trip latencies significanly faster than a single clock tick. For the RPC client, adopt the high resolution time stamp mechanism already used by the network layer and blktrace: ktime. We use ktime format time stamps for all internal computations, and convert to milliseconds for presentation. As a result, we need only addition operations in the performance critical paths; multiply/divide is required only for presentation. We could report RTT metrics in microseconds. In fact the mountstats format is versioned to accomodate exactly this kind of interface improvement. For now, however, we'll stay with millisecond precision for presentation to maintain backwards compatibility with the handful of currently deployed user space tools. At a later point, we'll move to an API such as BDI_STATS where a finer timestamp precision can be reported. Signed-off-by: Chuck Lever <chuck.lever@oracle.com> Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
-rw-r--r--include/linux/sunrpc/metrics.h7
-rw-r--r--include/linux/sunrpc/sched.h5
-rw-r--r--include/linux/sunrpc/xprt.h3
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/stats.c27
-rw-r--r--net/sunrpc/xprt.c8
6 files changed, 24 insertions, 28 deletions
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
index 77f78e56c481..b6edbc0ea83d 100644
--- a/include/linux/sunrpc/metrics.h
+++ b/include/linux/sunrpc/metrics.h
@@ -26,6 +26,7 @@
#define _LINUX_SUNRPC_METRICS_H
#include <linux/seq_file.h>
+#include <linux/ktime.h>
#define RPC_IOSTATS_VERS "1.0"
@@ -58,9 +59,9 @@ struct rpc_iostats {
* and the total time the request spent from init to release
* are measured.
*/
- unsigned long long om_queue, /* jiffies queued for xmit */
- om_rtt, /* jiffies for RPC RTT */
- om_execute; /* jiffies for RPC execution */
+ ktime_t om_queue, /* queued for xmit */
+ om_rtt, /* RPC RTT */
+ om_execute; /* RPC execution */
} ____cacheline_aligned;
struct rpc_task;
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7bc7fd5291ce..76720d262ef2 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -10,6 +10,7 @@
#define _LINUX_SUNRPC_SCHED_H_
#include <linux/timer.h>
+#include <linux/ktime.h>
#include <linux/sunrpc/types.h>
#include <linux/spinlock.h>
#include <linux/wait.h>
@@ -80,8 +81,8 @@ struct rpc_task {
unsigned short tk_timeouts; /* maj timeouts */
size_t tk_bytes_sent; /* total bytes sent */
- unsigned long tk_start; /* RPC task init timestamp */
- long tk_rtt; /* round-trip time (jiffies) */
+ ktime_t tk_start, /* RPC task init timestamp */
+ tk_rtt; /* round-trip time */
pid_t tk_owner; /* Process id for batching tasks */
unsigned char tk_priority : 2;/* Task priority */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 04fc342d9840..f8851861b744 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -13,6 +13,7 @@
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kref.h>
+#include <linux/ktime.h>
#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h>
@@ -94,7 +95,7 @@ struct rpc_rqst {
*/
u32 rq_bytes_sent; /* Bytes we have sent */
- unsigned long rq_xtime; /* when transmitted */
+ ktime_t rq_xtime; /* transmit time stamp */
int rq_ntrans;
#if defined(CONFIG_NFS_V4_1)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c8979ce5d88a..aa7b07ef5d55 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -834,7 +834,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
}
/* starting timestamp */
- task->tk_start = jiffies;
+ task->tk_start = ktime_get();
dprintk("RPC: new task initialized, procpid %u\n",
task_pid_nr(current));
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 5785d2037f45..aacd95f0dce5 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -144,7 +144,7 @@ void rpc_count_iostats(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_iostats *stats;
struct rpc_iostats *op_metrics;
- long rtt, execute, queue;
+ ktime_t delta;
if (!task->tk_client || !task->tk_client->cl_metrics || !req)
return;
@@ -159,20 +159,13 @@ void rpc_count_iostats(struct rpc_task *task)
op_metrics->om_bytes_sent += task->tk_bytes_sent;
op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
- queue = (long)req->rq_xtime - task->tk_start;
- if (queue < 0)
- queue = -queue;
- op_metrics->om_queue += queue;
+ delta = ktime_sub(req->rq_xtime, task->tk_start);
+ op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
- rtt = task->tk_rtt;
- if (rtt < 0)
- rtt = -rtt;
- op_metrics->om_rtt += rtt;
+ op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, task->tk_rtt);
- execute = (long)jiffies - task->tk_start;
- if (execute < 0)
- execute = -execute;
- op_metrics->om_execute += execute;
+ delta = ktime_sub(ktime_get(), task->tk_start);
+ op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
}
static void _print_name(struct seq_file *seq, unsigned int op,
@@ -186,8 +179,6 @@ static void _print_name(struct seq_file *seq, unsigned int op,
seq_printf(seq, "\t%12u: ", op);
}
-#define MILLISECS_PER_JIFFY (1000 / HZ)
-
void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
{
struct rpc_iostats *stats = clnt->cl_metrics;
@@ -214,9 +205,9 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
metrics->om_timeouts,
metrics->om_bytes_sent,
metrics->om_bytes_recv,
- metrics->om_queue * MILLISECS_PER_JIFFY,
- metrics->om_rtt * MILLISECS_PER_JIFFY,
- metrics->om_execute * MILLISECS_PER_JIFFY);
+ ktime_to_ms(metrics->om_queue),
+ ktime_to_ms(metrics->om_rtt),
+ ktime_to_ms(metrics->om_execute));
}
}
EXPORT_SYMBOL_GPL(rpc_print_iostats);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 698c62712294..8986b1b82862 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -43,6 +43,7 @@
#include <linux/interrupt.h>
#include <linux/workqueue.h>
#include <linux/net.h>
+#include <linux/ktime.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/metrics.h>
@@ -779,10 +780,11 @@ static void xprt_update_rtt(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_rtt *rtt = task->tk_client->cl_rtt;
unsigned timer = task->tk_msg.rpc_proc->p_timer;
+ long m = usecs_to_jiffies(ktime_to_us(task->tk_rtt));
if (timer) {
if (req->rq_ntrans == 1)
- rpc_update_rtt(rtt, timer, task->tk_rtt);
+ rpc_update_rtt(rtt, timer, m);
rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
}
}
@@ -803,7 +805,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
task->tk_pid, ntohl(req->rq_xid), copied);
xprt->stat.recvs++;
- task->tk_rtt = (long)jiffies - req->rq_xtime;
+ task->tk_rtt = ktime_sub(ktime_get(), req->rq_xtime);
if (xprt->ops->timer != NULL)
xprt_update_rtt(task);
@@ -904,7 +906,7 @@ void xprt_transmit(struct rpc_task *task)
return;
req->rq_connect_cookie = xprt->connect_cookie;
- req->rq_xtime = jiffies;
+ req->rq_xtime = ktime_get();
status = xprt->ops->send_request(task);
if (status != 0) {
task->tk_status = status;
OpenPOWER on IntegriCloud