aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMathieu Desnoyers <mathieu.desnoyers@efficios.com>2011-03-16 19:04:34 -0400
committerMathieu Desnoyers <mathieu.desnoyers@polymtl.ca>2011-03-16 19:04:34 -0400
commit4298d6b069039c7f8e676f3b7499a00a4426971b (patch)
tree8efa05dd5006f8db7b078a2312edbdc1ef0c0688
parent85f84b8888712d234a3746fdf74a9ff36c58a8aa (diff)
lttng-instrumentation/lttng-instrumentation-fs
LTTng instrumentation - fs Core filesystem tracepoints. Tracepoints added : fs_buffer_wait_end fs_buffer_wait_start fs_close fs_exec fs_ioctl fs_llseek fs_lseek fs_open fs_poll fs_pread64 fs_pwrite64 fs_read fs_readv fs_select fs_write fs_writev Instrument the core FS events, extracting the information when it is available. e.g. the instrumentation of "reads" is inserted _after_ the read, when the information is ready. This would not be possible if the instrumentation would be placed elsewhere (at the beginning of the system call for instance). Those tracepoints are used by LTTng. About the performance impact of tracepoints (which is comparable to markers), even without immediate values optimizations, tests done by Hideo Aoki on ia64 show no regression. His test case was using hackbench on a kernel where scheduler instrumentation (about 5 events in code scheduler code) was added. See the "Tracepoints" patch header for performance result detail. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> CC: Alexander Viro <viro@zeniv.linux.org.uk> CC: Masami Hiramatsu <mhiramat@redhat.com> CC: 'Peter Zijlstra' <peterz@infradead.org> CC: "Frank Ch. Eigler" <fche@redhat.com> CC: 'Ingo Molnar' <mingo@elte.hu> CC: 'Hideo AOKI' <haoki@redhat.com> CC: Takashi Nishiie <t-nishiie@np.css.fujitsu.com> CC: 'Steven Rostedt' <rostedt@goodmis.org> CC: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/compat.c2
-rw-r--r--fs/exec.c7
-rw-r--r--fs/ioctl.c5
-rw-r--r--fs/open.c6
-rw-r--r--fs/read_write.c28
-rw-r--r--fs/select.c6
-rw-r--r--include/trace/fs.h66
8 files changed, 124 insertions, 2 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 2219a76e2ca..5d0c2c6045c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,11 +41,15 @@
#include <linux/bitops.h>
#include <linux/mpage.h>
#include <linux/bit_spinlock.h>
+#include <trace/fs.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
+DEFINE_TRACE(fs_buffer_wait_start);
+DEFINE_TRACE(fs_buffer_wait_end);
+
inline void
init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
{
@@ -90,7 +94,9 @@ EXPORT_SYMBOL(unlock_buffer);
*/
void __wait_on_buffer(struct buffer_head * bh)
{
+ trace_fs_buffer_wait_start(bh);
wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
+ trace_fs_buffer_wait_end(bh);
}
EXPORT_SYMBOL(__wait_on_buffer);
diff --git a/fs/compat.c b/fs/compat.c
index 691c3fd8ce1..933042d14e6 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -50,6 +50,7 @@
#include <linux/fs_struct.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -1533,6 +1534,7 @@ int compat_do_execve(char * filename,
if (retval < 0)
goto out;
+ trace_fs_exec(filename);
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6a..9a92bbe142d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
#include <linux/fs_struct.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -77,6 +78,11 @@ static atomic_t call_count = ATOMIC_INIT(1);
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);
+/*
+ * Also used in compat.c.
+ */
+DEFINE_TRACE(fs_exec);
+
int __register_binfmt(struct linux_binfmt * fmt, int insert)
{
if (!fmt)
@@ -1447,6 +1453,7 @@ int do_execve(const char * filename,
if (retval < 0)
goto out;
+ trace_fs_exec(filename);
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1eebeb72b20..a1fecf33b11 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,9 +15,12 @@
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/falloc.h>
+#include <trace/fs.h>
#include <asm/ioctls.h>
+DEFINE_TRACE(fs_ioctl);
+
/* So that the fiemap access checks can't overflow on 32 bit machines. */
#define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent))
@@ -616,6 +619,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
if (!filp)
goto out;
+ trace_fs_ioctl(fd, cmd, arg);
+
error = security_file_ioctl(filp, cmd, arg);
if (error)
goto out_fput;
diff --git a/fs/open.c b/fs/open.c
index b47aab39c05..575c92f3b81 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,9 +30,13 @@
#include <linux/fs_struct.h>
#include <linux/ima.h>
#include <linux/dnotify.h>
+#include <trace/fs.h>
#include "internal.h"
+DEFINE_TRACE(fs_open);
+DEFINE_TRACE(fs_close);
+
int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
struct file *filp)
{
@@ -906,6 +910,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
fsnotify_open(f);
fd_install(fd, f);
}
+ trace_fs_open(fd, tmp);
}
putname(tmp);
}
@@ -995,6 +1000,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
filp = fdt->fd[fd];
if (!filp)
goto out_unlock;
+ trace_fs_close(fd);
rcu_assign_pointer(fdt->fd[fd], NULL);
FD_CLR(fd, fdt->close_on_exec);
__put_unused_fd(files, fd);
diff --git a/fs/read_write.c b/fs/read_write.c
index 5520f8ad550..6a3f7f9c9db 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
+#include <trace/fs.h>
#include "read_write.h"
#include <asm/uaccess.h>
@@ -30,6 +31,15 @@ const struct file_operations generic_ro_fops = {
EXPORT_SYMBOL(generic_ro_fops);
+DEFINE_TRACE(fs_lseek);
+DEFINE_TRACE(fs_llseek);
+DEFINE_TRACE(fs_read);
+DEFINE_TRACE(fs_write);
+DEFINE_TRACE(fs_pread64);
+DEFINE_TRACE(fs_pwrite64);
+DEFINE_TRACE(fs_readv);
+DEFINE_TRACE(fs_writev);
+
static inline int unsigned_offsets(struct file *file)
{
return file->f_mode & FMODE_UNSIGNED_OFFSET;
@@ -187,6 +197,9 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
if (res != (loff_t)retval)
retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
}
+
+ trace_fs_lseek(fd, offset, origin);
+
fput_light(file, fput_needed);
bad:
return retval;
@@ -214,6 +227,8 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
origin);
+ trace_fs_llseek(fd, offset, origin);
+
retval = (int)offset;
if (offset >= 0) {
retval = -EFAULT;
@@ -409,6 +424,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_read(file, buf, count, &pos);
+ trace_fs_read(fd, buf, count, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
@@ -427,6 +443,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_write(file, buf, count, &pos);
+ trace_fs_write(fd, buf, count, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
@@ -447,8 +464,11 @@ SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
file = fget_light(fd, &fput_needed);
if (file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PREAD)
+ if (file->f_mode & FMODE_PREAD) {
ret = vfs_read(file, buf, count, &pos);
+ trace_fs_pread64(fd, buf, count, pos, ret);
+ }
+
fput_light(file, fput_needed);
}
@@ -476,8 +496,10 @@ SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
file = fget_light(fd, &fput_needed);
if (file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PWRITE)
+ if (file->f_mode & FMODE_PWRITE) {
ret = vfs_write(file, buf, count, &pos);
+ trace_fs_pwrite64(fd, buf, count, pos, ret);
+ }
fput_light(file, fput_needed);
}
@@ -736,6 +758,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_readv(file, vec, vlen, &pos);
+ trace_fs_readv(fd, vec, vlen, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
@@ -757,6 +780,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_writev(file, vec, vlen, &pos);
+ trace_fs_writev(fd, vec, vlen, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
diff --git a/fs/select.c b/fs/select.c
index fa13f263924..64c2404f2cc 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
#include <linux/fs.h>
#include <linux/rcupdate.h>
#include <linux/hrtimer.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
@@ -98,6 +99,9 @@ struct poll_table_page {
#define POLL_TABLE_FULL(table) \
((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
+DEFINE_TRACE(fs_select);
+DEFINE_TRACE(fs_poll);
+
/*
* Ok, Peter made a complicated, but straightforward multiple_wait() function.
* I have rewritten this, taking some shortcuts: This code may not be easy to
@@ -485,6 +489,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
file = fget_light(i, &fput_needed);
if (file) {
f_op = file->f_op;
+ trace_fs_select(i, end_time);
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll) {
wait_key_set(wait, in, out, bit);
@@ -774,6 +779,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
file = fget_light(fd, &fput_needed);
mask = POLLNVAL;
if (file != NULL) {
+ trace_fs_poll(fd);
mask = DEFAULT_POLLMASK;
if (file->f_op && file->f_op->poll) {
if (pwait)
diff --git a/include/trace/fs.h b/include/trace/fs.h
new file mode 100644
index 00000000000..29c4ca696a7
--- /dev/null
+++ b/include/trace/fs.h
@@ -0,0 +1,66 @@
+#ifndef _TRACE_FS_H
+#define _TRACE_FS_H
+
+#include <linux/buffer_head.h>
+#include <linux/time.h>
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(fs_buffer_wait_start,
+ TP_PROTO(struct buffer_head *bh),
+ TP_ARGS(bh));
+DECLARE_TRACE(fs_buffer_wait_end,
+ TP_PROTO(struct buffer_head *bh),
+ TP_ARGS(bh));
+DECLARE_TRACE(fs_exec,
+ TP_PROTO(char *filename),
+ TP_ARGS(filename));
+DECLARE_TRACE(fs_ioctl,
+ TP_PROTO(unsigned int fd, unsigned int cmd, unsigned long arg),
+ TP_ARGS(fd, cmd, arg));
+DECLARE_TRACE(fs_open,
+ TP_PROTO(int fd, char *filename),
+ TP_ARGS(fd, filename));
+DECLARE_TRACE(fs_close,
+ TP_PROTO(unsigned int fd),
+ TP_ARGS(fd));
+DECLARE_TRACE(fs_lseek,
+ TP_PROTO(unsigned int fd, long offset, unsigned int origin),
+ TP_ARGS(fd, offset, origin));
+DECLARE_TRACE(fs_llseek,
+ TP_PROTO(unsigned int fd, loff_t offset, unsigned int origin),
+ TP_ARGS(fd, offset, origin));
+
+/*
+ * Probes must be aware that __user * may be modified by concurrent userspace
+ * or kernel threads.
+ */
+DECLARE_TRACE(fs_read,
+ TP_PROTO(unsigned int fd, char __user *buf, size_t count, ssize_t ret),
+ TP_ARGS(fd, buf, count, ret));
+DECLARE_TRACE(fs_write,
+ TP_PROTO(unsigned int fd, const char __user *buf, size_t count,
+ ssize_t ret),
+ TP_ARGS(fd, buf, count, ret));
+DECLARE_TRACE(fs_pread64,
+ TP_PROTO(unsigned int fd, char __user *buf, size_t count, loff_t pos,
+ ssize_t ret),
+ TP_ARGS(fd, buf, count, pos, ret));
+DECLARE_TRACE(fs_pwrite64,
+ TP_PROTO(unsigned int fd, const char __user *buf, size_t count,
+ loff_t pos, ssize_t ret),
+ TP_ARGS(fd, buf, count, pos, ret));
+DECLARE_TRACE(fs_readv,
+ TP_PROTO(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, ssize_t ret),
+ TP_ARGS(fd, vec, vlen, ret));
+DECLARE_TRACE(fs_writev,
+ TP_PROTO(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, ssize_t ret),
+ TP_ARGS(fd, vec, vlen, ret));
+DECLARE_TRACE(fs_select,
+ TP_PROTO(int fd, struct timespec *end_time),
+ TP_ARGS(fd, end_time));
+DECLARE_TRACE(fs_poll,
+ TP_PROTO(int fd),
+ TP_ARGS(fd));
+#endif