aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Rigby <john.rigby@linaro.org>2011-03-09 17:24:48 -0700
committerJohn Rigby <john.rigby@linaro.org>2011-03-09 17:24:48 -0700
commit0ffb81261428df3e9f4d32ed00a6bd385299bfcf (patch)
tree06bbc160842f90e0243e1b29dacd13242564e380
parent64b883c9d077d0d0f70685ab67a88dbcd1a72720 (diff)
parent01355eaa8f6aadbf113d9d75c59accc60c274144 (diff)
Merge remote branch 'aviksil-lttng/linaro' into linux-linaro-2.6.38-alt
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--Documentation/markers.txt113
-rw-r--r--Documentation/trace/tracepoints.txt2
-rw-r--r--arch/alpha/include/asm/thread_info.h6
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/Kconfig.debug4
-rw-r--r--arch/arm/include/asm/a.out-core.h6
-rw-r--r--arch/arm/include/asm/system.h2
-rw-r--r--arch/arm/include/asm/thread_info.h3
-rw-r--r--arch/arm/include/asm/trace-clock.h1
-rw-r--r--arch/arm/include/asm/unistd.h2
-rw-r--r--arch/arm/include/asm/user.h2
-rw-r--r--arch/arm/kernel/entry-common.S10
-rw-r--r--arch/arm/kernel/process.c9
-rw-r--r--arch/arm/kernel/ptrace.c34
-rw-r--r--arch/arm/kernel/traps.c8
-rw-r--r--arch/arm/mach-omap2/Makefile1
-rw-r--r--arch/arm/mach-omap2/clkt34xx_dpll3m2.c3
-rw-r--r--arch/arm/mach-omap2/clock34xx.c4
-rw-r--r--arch/arm/mach-omap2/pm34xx.c29
-rw-r--r--arch/arm/mach-omap2/trace-clock.c726
-rw-r--r--arch/arm/plat-omap/Kconfig4
-rw-r--r--arch/arm/plat-omap/counter_32k.c5
-rw-r--r--arch/arm/plat-omap/include/plat/clock.h2
-rw-r--r--arch/arm/plat-omap/include/plat/trace-clock.h172
-rw-r--r--arch/avr32/include/asm/thread_info.h23
-rw-r--r--arch/blackfin/include/asm/thread_info.h8
-rw-r--r--arch/cris/include/asm/thread_info.h9
-rw-r--r--arch/frv/include/asm/thread_info.h4
-rw-r--r--arch/h8300/include/asm/thread_info.h16
-rw-r--r--arch/ia64/include/asm/thread_info.h10
-rw-r--r--arch/ia64/kernel/entry.S6
-rw-r--r--arch/m32r/include/asm/thread_info.h10
-rw-r--r--arch/m68k/include/asm/thread_info.h1
-rw-r--r--arch/mips/Kconfig15
-rw-r--r--arch/mips/include/asm/barrier.h6
-rw-r--r--arch/mips/include/asm/mipsregs.h1
-rw-r--r--arch/mips/include/asm/octeon/trace-clock.h43
-rw-r--r--arch/mips/include/asm/thread_info.h4
-rw-r--r--arch/mips/include/asm/timex.h97
-rw-r--r--arch/mips/include/asm/trace-clock.h77
-rw-r--r--arch/mips/kernel/entry.S2
-rw-r--r--arch/mips/kernel/linux32.c5
-rw-r--r--arch/mips/kernel/process.c9
-rw-r--r--arch/mips/kernel/ptrace.c9
-rw-r--r--arch/mips/kernel/scall32-o32.S2
-rw-r--r--arch/mips/kernel/scall64-64.S5
-rw-r--r--arch/mips/kernel/scall64-n32.S4
-rw-r--r--arch/mips/kernel/scall64-o32.S10
-rw-r--r--arch/mips/kernel/smp.c3
-rw-r--r--arch/mips/kernel/syscall.c66
-rw-r--r--arch/mips/kernel/time.c1
-rw-r--r--arch/mips/kernel/traps.c25
-rw-r--r--arch/mips/kernel/unaligned.c6
-rw-r--r--arch/mips/mm/fault.c7
-rw-r--r--arch/parisc/include/asm/thread_info.h2
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/thread_info.h8
-rw-r--r--arch/powerpc/include/asm/timex.h12
-rw-r--r--arch/powerpc/include/asm/trace-clock.h48
-rw-r--r--arch/powerpc/include/asm/trace.h3
-rw-r--r--arch/powerpc/kernel/irq.c4
-rw-r--r--arch/powerpc/kernel/misc_32.S2
-rw-r--r--arch/powerpc/kernel/misc_64.S2
-rw-r--r--arch/powerpc/kernel/ppc_ksyms.c3
-rw-r--r--arch/powerpc/kernel/process.c14
-rw-r--r--arch/powerpc/kernel/ptrace.c8
-rw-r--r--arch/powerpc/kernel/sys_ppc32.c4
-rw-r--r--arch/powerpc/kernel/time.c4
-rw-r--r--arch/powerpc/kernel/traps.c19
-rw-r--r--arch/powerpc/mm/fault.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c1
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c1
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/kernel/entry.S3
-rw-r--r--arch/s390/kernel/entry64.S3
-rw-r--r--arch/s390/kernel/ptrace.c5
-rw-r--r--arch/s390/kernel/sys_s390.c5
-rw-r--r--arch/s390/kernel/traps.c39
-rw-r--r--arch/s390/mm/fault.c10
-rw-r--r--arch/sh/Kconfig3
-rw-r--r--arch/sh/include/asm/thread_info.h9
-rw-r--r--arch/sh/include/asm/timex.h15
-rw-r--r--arch/sh/include/asm/trace-clock.h66
-rw-r--r--arch/sh/kernel/Makefile1
-rw-r--r--arch/sh/kernel/process_32.c5
-rw-r--r--arch/sh/kernel/process_64.c10
-rw-r--r--arch/sh/kernel/ptrace_32.c32
-rw-r--r--arch/sh/kernel/ptrace_64.c4
-rw-r--r--arch/sh/kernel/sys_sh.c3
-rw-r--r--arch/sh/kernel/trace-clock.c55
-rw-r--r--arch/sh/kernel/traps_32.c11
-rw-r--r--arch/sh/mm/fault_32.c60
-rw-r--r--arch/sparc/Kconfig2
-rw-r--r--arch/sparc/include/asm/thread_info_32.h2
-rw-r--r--arch/sparc/include/asm/thread_info_64.h3
-rw-r--r--arch/sparc/include/asm/timex_64.h19
-rw-r--r--arch/sparc/include/asm/trace-clock.h44
-rw-r--r--arch/sparc/kernel/entry.S8
-rw-r--r--arch/sparc/kernel/process_32.c4
-rw-r--r--arch/sparc/kernel/time_64.c3
-rw-r--r--arch/um/include/asm/thread_info.h2
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/ia32/ipc32.c5
-rw-r--r--arch/x86/include/asm/idle.h17
-rw-r--r--arch/x86/include/asm/irqflags.h56
-rw-r--r--arch/x86/include/asm/kvm-mmutrace.h (renamed from arch/x86/kvm/mmutrace.h)4
-rw-r--r--arch/x86/include/asm/kvm-trace.h (renamed from arch/x86/kvm/trace.h)4
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h1
-rw-r--r--arch/x86/include/asm/thread_info.h9
-rw-r--r--arch/x86/include/asm/trace-clock.h73
-rw-r--r--arch/x86/include/asm/tsc.h18
-rw-r--r--arch/x86/include/asm/vgtod.h1
-rw-r--r--arch/x86/include/asm/vsyscall.h8
-rw-r--r--arch/x86/kernel/Makefile4
-rw-r--r--arch/x86/kernel/apic/apic.c7
-rw-r--r--arch/x86/kernel/apm_32.c9
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c3
-rw-r--r--arch/x86/kernel/dumpstack.c7
-rw-r--r--arch/x86/kernel/entry_32.S30
-rw-r--r--arch/x86/kernel/entry_64.S45
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c6
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c6
-rw-r--r--arch/x86/kernel/process.c9
-rw-r--r--arch/x86/kernel/process_32.c43
-rw-r--r--arch/x86/kernel/process_64.c36
-rw-r--r--arch/x86/kernel/ptrace.c8
-rw-r--r--arch/x86/kernel/syscall_64.c18
-rw-r--r--arch/x86/kernel/trace-clock.c302
-rw-r--r--arch/x86/kernel/traps.c72
-rw-r--r--arch/x86/kernel/tsc_sync.c198
-rw-r--r--arch/x86/kernel/vsyscall_64.c14
-rw-r--r--arch/x86/kvm/i8259.c2
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu.c2
-rw-r--r--arch/x86/kvm/svm.c2
-rw-r--r--arch/x86/kvm/vmx.c2
-rw-r--r--arch/x86/kvm/x86.c2
-rw-r--r--arch/x86/lguest/boot.c1
-rw-r--r--arch/x86/mm/fault.c10
-rw-r--r--arch/x86/mm/tlb.c4
-rw-r--r--arch/x86/vdso/vclock_gettime.c48
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--arch/xtensa/include/asm/thread_info.h4
-rw-r--r--drivers/idle/i7300_idle.c5
-rw-r--r--drivers/input/input.c3
-rw-r--r--drivers/net/wan/hd64570.c20
-rw-r--r--drivers/net/wan/hd64572.c18
-rw-r--r--fs/buffer.c6
-rw-r--r--fs/compat.c2
-rw-r--r--fs/exec.c7
-rw-r--r--fs/ioctl.c5
-rw-r--r--fs/open.c6
-rw-r--r--fs/read_write.c28
-rw-r--r--fs/select.c47
-rw-r--r--fs/seq_file.c44
-rw-r--r--fs/splice.c1
-rw-r--r--include/asm-generic/trace-clock.h76
-rw-r--r--include/asm-generic/vmlinux.lds.h6
-rw-r--r--include/linux/Kbuild1
-rw-r--r--include/linux/align.h66
-rw-r--r--include/linux/idle.h19
-rw-r--r--include/linux/immediate.h93
-rw-r--r--include/linux/irqnr.h1
-rw-r--r--include/linux/kernel.h9
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/ltt-channels.h108
-rw-r--r--include/linux/ltt-core.h58
-rw-r--r--include/linux/marker.h273
-rw-r--r--include/linux/module.h23
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/poll.h2
-rw-r--r--include/linux/rculist.h27
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/seq_file.h21
-rw-r--r--include/linux/swap.h7
-rw-r--r--include/linux/swapops.h8
-rw-r--r--include/linux/time.h2
-rw-r--r--include/linux/trace-clock.h17
-rw-r--r--include/trace/events/timer.h6
-rw-r--r--include/trace/fault.h25
-rw-r--r--include/trace/filemap.h19
-rw-r--r--include/trace/fs.h66
-rw-r--r--include/trace/hugetlb.h28
-rw-r--r--include/trace/ipc.h18
-rw-r--r--include/trace/ipv4.h14
-rw-r--r--include/trace/ipv6.h14
-rw-r--r--include/trace/irq.h31
-rw-r--r--include/trace/kernel.h31
-rw-r--r--include/trace/lockdep.h37
-rw-r--r--include/trace/net.h40
-rw-r--r--include/trace/page_alloc.h16
-rw-r--r--include/trace/pm.h11
-rw-r--r--include/trace/rcu.h43
-rw-r--r--include/trace/sched.h11
-rw-r--r--include/trace/socket.h77
-rw-r--r--include/trace/swap.h20
-rw-r--r--include/trace/syscall.h8
-rw-r--r--include/trace/timer.h24
-rw-r--r--include/trace/trap.h11
-rw-r--r--init/Kconfig45
-rw-r--r--ipc/msg.c8
-rw-r--r--ipc/sem.c8
-rw-r--r--ipc/shm.c8
-rw-r--r--ipc/syscall.c5
-rw-r--r--kernel/Makefile4
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/irq/handle.c8
-rw-r--r--kernel/irq/irqdesc.c2
-rw-r--r--kernel/itimer.c7
-rw-r--r--kernel/kexec.c8
-rw-r--r--kernel/lockdep.c22
-rw-r--r--kernel/ltt-channels.c388
-rw-r--r--kernel/marker.c1262
-rw-r--r--kernel/module.c110
-rw-r--r--kernel/notifier.c31
-rw-r--r--kernel/panic.c7
-rw-r--r--kernel/printk.c7
-rw-r--r--kernel/rcutree.c7
-rw-r--r--kernel/sched.c54
-rw-r--r--kernel/softirq.c27
-rw-r--r--kernel/time/Makefile1
-rw-r--r--kernel/time/tsc-sync.c313
-rw-r--r--kernel/timer.c16
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/trace-clock-32-to-64.c295
-rw-r--r--kernel/trace/trace-clock.c97
-rw-r--r--kernel/trace/trace_printk.c1
-rw-r--r--mm/filemap.c10
-rw-r--r--mm/hugetlb.c63
-rw-r--r--mm/memory.c7
-rw-r--r--mm/page_alloc.c5
-rw-r--r--mm/page_io.c4
-rw-r--r--mm/swapfile.c32
-rw-r--r--mm/vmalloc.c2
-rw-r--r--net/core/dev.c18
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/tcp_ipv4.c6
-rw-r--r--net/ipv4/udp.c5
-rw-r--r--net/ipv6/addrconf.c7
-rw-r--r--net/socket.c77
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile2
-rw-r--r--samples/markers/Makefile4
-rw-r--r--samples/markers/marker-example.c53
-rw-r--r--samples/markers/probe-example.c94
-rw-r--r--samples/markers/test-multi.c116
-rw-r--r--scripts/Makefile.modpost12
-rw-r--r--scripts/mod/modpost.c165
-rw-r--r--scripts/mod/modpost.h3
256 files changed, 7891 insertions, 443 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f4a04c0c7ed..d46f541baf8 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -794,6 +794,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
that can be changed at run time by the
set_graph_function file in the debugfs tracing directory.
+ force_tsc_sync=1
+ Force TSC resynchronization when SMP CPUs go online.
+ See also idle=poll and disable frequency scaling.
+
gamecon.map[2|3]=
[HW,JOY] Multisystem joystick and NES/SNES/PSX pad
support via parallel port (up to 5 devices per port)
diff --git a/Documentation/markers.txt b/Documentation/markers.txt
new file mode 100644
index 00000000000..e25df7cd03b
--- /dev/null
+++ b/Documentation/markers.txt
@@ -0,0 +1,113 @@
+ Using the Linux Kernel Markers
+
+ Mathieu Desnoyers
+
+
+This document introduces Linux Kernel Markers and their use. It provides
+examples of how to insert markers in the kernel and connect probe functions to
+them and provides some examples of probe functions.
+
+
+* Purpose of markers
+
+A marker placed in code provides a hook to call a function (probe) that you can
+provide at runtime. A marker can be "on" (a probe is connected to it) or "off"
+(no probe is attached). When a marker is "off" it has no effect, except for
+adding a tiny time penalty (checking a condition for a branch) and space
+penalty (adding a few bytes for the function call at the end of the
+instrumented function and adds a data structure in a separate section). The
+immediate values are used to minimize the impact on data cache, encoding the
+condition in the instruction stream. When a marker is "on", the function you
+provide is called each time the marker is executed, in the execution context of
+the caller. When the function provided ends its execution, it returns to the
+caller (continuing from the marker site).
+
+You can put markers at important locations in the code. Markers are
+lightweight hooks that can pass an arbitrary number of parameters,
+described in a printk-like format string, to the attached probe function.
+
+They can be used for tracing and performance accounting.
+
+
+* Usage
+
+In order to use the macro trace_mark, you should include linux/marker.h.
+
+#include <linux/marker.h>
+
+And,
+
+trace_mark(subsystem_event, "myint %d mystring %s", someint, somestring);
+Where :
+- subsystem_event is an identifier unique to your event
+ - subsystem is the name of your subsystem.
+ - event is the name of the event to mark.
+- "myint %d mystring %s" is the formatted string for the serializer. "myint" and
+ "mystring" are repectively the field names associated with the first and
+ second parameter.
+- someint is an integer.
+- somestring is a char pointer.
+
+Connecting a function (probe) to a marker is done by providing a probe (function
+to call) for the specific marker through marker_probe_register() and can be
+activated by calling marker_arm(). Marker deactivation can be done by calling
+marker_disarm() as many times as marker_arm() has been called. Removing a probe
+is done through marker_probe_unregister(); it will disarm the probe.
+
+marker_synchronize_unregister() must be called between probe unregistration and
+the first occurrence of
+- the end of module exit function,
+ to make sure there is no caller left using the probe;
+- the free of any resource used by the probes,
+ to make sure the probes wont be accessing invalid data.
+This, and the fact that preemption is disabled around the probe call, make sure
+that probe removal and module unload are safe. See the "Probe example" section
+below for a sample probe module.
+
+The marker mechanism supports inserting multiple instances of the same marker.
+Markers can be put in inline functions, inlined static functions, and
+unrolled loops as well as regular functions.
+
+The naming scheme "subsystem_event" is suggested here as a convention intended
+to limit collisions. Marker names are global to the kernel: they are considered
+as being the same whether they are in the core kernel image or in modules.
+Conflicting format strings for markers with the same name will cause the markers
+to be detected to have a different format string not to be armed and will output
+a printk warning which identifies the inconsistency:
+
+"Format mismatch for probe probe_name (format), marker (format)"
+
+Another way to use markers is to simply define the marker without generating any
+function call to actually call into the marker. This is useful in combination
+with tracepoint probes in a scheme like this :
+
+void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk);
+
+DEFINE_MARKER_TP(marker_channel, marker_eventname, tracepoint_name,
+ probe_tracepoint_name, "arg1 %u pid %d");
+
+notrace void probe_tracepoint_name(unsigned int arg1, struct task_struct *tsk)
+{
+ struct marker *marker = &GET_MARKER(marker_channel, marker_eventname);
+ /* write data to trace buffers ... */
+}
+
+* Optimization for a given architecture
+
+To force use of a non-optimized version of the markers, _trace_mark() should be
+used. It takes the same parameters as the normal markers, but it does not use
+the immediate values based on code patching.
+
+
+* Probe / marker example
+
+See the example provided in samples/markers/src
+
+Compile them with your kernel.
+
+Run, as root :
+modprobe marker-example (insmod order is not important)
+modprobe probe-example
+cat /proc/marker-example (returns an expected error)
+rmmod marker-example probe-example
+dmesg
diff --git a/Documentation/trace/tracepoints.txt b/Documentation/trace/tracepoints.txt
index c0e1ceed75a..d380250339a 100644
--- a/Documentation/trace/tracepoints.txt
+++ b/Documentation/trace/tracepoints.txt
@@ -106,7 +106,7 @@ used to export the defined tracepoints.
See the example provided in samples/tracepoints
Compile them with your kernel. They are built during 'make' (not
-'make modules') when CONFIG_SAMPLE_TRACEPOINTS=m.
+'make modules') when CONFIG_SAMPLE=y and CONFIG_SAMPLE_TRACEPOINTS=m.
Run, as root :
modprobe tracepoint-sample (insmod order is not important)
diff --git a/arch/alpha/include/asm/thread_info.h b/arch/alpha/include/asm/thread_info.h
index 6f32f9c84a2..1ba67b14578 100644
--- a/arch/alpha/include/asm/thread_info.h
+++ b/arch/alpha/include/asm/thread_info.h
@@ -56,7 +56,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (2*PAGE_SIZE)
-#define PREEMPT_ACTIVE 0x40000000
+#define PREEMPT_ACTIVE 0x10000000
/*
* Thread information flags:
@@ -79,6 +79,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
#define TIF_UAC_SIGBUS 12
#define TIF_MEMDIE 13 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 14 /* restore signal mask in do_signal */
+#define TIF_KERNEL_TRACE 15 /* Kernel tracing of syscalls */
#define TIF_FREEZE 16 /* is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -87,6 +88,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
+#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE)
#define _TIF_FREEZE (1<<TIF_FREEZE)
/* Work to do on interrupt/exception return. */
@@ -95,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
/* Work to do on any return to userspace. */
#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK \
- | _TIF_SYSCALL_TRACE)
+ | _TIF_SYSCALL_TRACE | _TIF_KERNEL_TRACE)
#define ALPHA_UAC_SHIFT 10
#define ALPHA_UAC_MASK (1 << TIF_UAC_NOPRINT | 1 << TIF_UAC_NOFIX | \
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a53245a9ef5..d8cdd7b00ac 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -13,6 +13,7 @@ config ARM
select HAVE_KPROBES if (!XIP_KERNEL && !THUMB2_KERNEL)
select HAVE_KRETPROBES if (HAVE_KPROBES)
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
+ select HAVE_LTT_DUMP_TABLES
select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 494224a9b45..23f8c4b764e 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -147,4 +147,8 @@ config DEBUG_S3C_UART
The uncompressor code port configuration is now handled
by CONFIG_S3C_LOWLEVEL_UART_PORT.
+config DEBUG_TRACE_CLOCK
+ bool "Debug trace clock"
+ depends on HAVE_TRACE_CLOCK
+
endmenu
diff --git a/arch/arm/include/asm/a.out-core.h b/arch/arm/include/asm/a.out-core.h
index 93d04acaa31..92f10cb5c70 100644
--- a/arch/arm/include/asm/a.out-core.h
+++ b/arch/arm/include/asm/a.out-core.h
@@ -32,11 +32,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
dump->u_dsize = (tsk->mm->brk - tsk->mm->start_data + PAGE_SIZE - 1) >> PAGE_SHIFT;
dump->u_ssize = 0;
- dump->u_debugreg[0] = tsk->thread.debug.bp[0].address;
- dump->u_debugreg[1] = tsk->thread.debug.bp[1].address;
- dump->u_debugreg[2] = tsk->thread.debug.bp[0].insn.arm;
- dump->u_debugreg[3] = tsk->thread.debug.bp[1].insn.arm;
- dump->u_debugreg[4] = tsk->thread.debug.nsaved;
+ memset(dump->u_debugreg, 0, sizeof(dump->u_debugreg));
if (dump->start_stack < 0x04000000)
dump->u_ssize = (0x04000000 - dump->start_stack) >> PAGE_SHIFT;
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 9a87823642d..cf7dc925c63 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -395,7 +395,7 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
break;
case 2:
do {
- asm volatile("@ __cmpxchg1\n"
+ asm volatile("@ __cmpxchg2\n"
" ldrexh %1, [%2]\n"
" mov %0, #0\n"
" teq %1, %3\n"
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 7b5cc8dae06..1f925b8bcd5 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -129,6 +129,7 @@ extern void vfp_flush_hwstate(struct thread_info *);
/*
* thread information flags:
* TIF_SYSCALL_TRACE - syscall trace active
+ * TIF_KERNEL_TRACE - kernel trace active
* TIF_SIGPENDING - signal pending
* TIF_NEED_RESCHED - rescheduling necessary
* TIF_NOTIFY_RESUME - callback before returning to user
@@ -138,6 +139,7 @@ extern void vfp_flush_hwstate(struct thread_info *);
#define TIF_SIGPENDING 0
#define TIF_NEED_RESCHED 1
#define TIF_NOTIFY_RESUME 2 /* callback before returning to user */
+#define TIF_KERNEL_TRACE 7
#define TIF_SYSCALL_TRACE 8
#define TIF_POLLING_NRFLAG 16
#define TIF_USING_IWMMXT 17
@@ -149,6 +151,7 @@ extern void vfp_flush_hwstate(struct thread_info *);
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
+#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE)
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT)
diff --git a/arch/arm/include/asm/trace-clock.h b/arch/arm/include/asm/trace-clock.h
new file mode 100644
index 00000000000..8a13b7dedde
--- /dev/null
+++ b/arch/arm/include/asm/trace-clock.h
@@ -0,0 +1 @@
+#include <plat/trace-clock.h>
diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index c891eb76c0e..92684d2e905 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -397,6 +397,8 @@
#define __NR_fanotify_mark (__NR_SYSCALL_BASE+368)
#define __NR_prlimit64 (__NR_SYSCALL_BASE+369)
+#define __NR_syscall_max 370
+
/*
* The following SWIs are ARM private.
*/
diff --git a/arch/arm/include/asm/user.h b/arch/arm/include/asm/user.h
index 05ac4b06876..35917b3a97f 100644
--- a/arch/arm/include/asm/user.h
+++ b/arch/arm/include/asm/user.h
@@ -71,7 +71,7 @@ struct user{
/* the registers. */
unsigned long magic; /* To uniquely identify a core file */
char u_comm[32]; /* User command that was responsible */
- int u_debugreg[8];
+ int u_debugreg[8]; /* No longer used */
struct user_fp u_fp; /* FP state */
struct user_fp_struct * u_fp0;/* Used by gdb to help find the values for */
/* the FP registers. */
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 1e7b04a40a3..1edf1deadf8 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -43,6 +43,8 @@ ret_fast_syscall:
* Ok, we need to do extra processing, enter the slow path.
*/
fast_work_pending:
+ tst r1, #_TIF_KERNEL_TRACE @ flag can be set asynchronously
+ bne __sys_trace_return
str r0, [sp, #S_R0+S_OFF]! @ returned r0
work_pending:
tst r1, #_TIF_NEED_RESCHED
@@ -85,8 +87,8 @@ ENTRY(ret_from_fork)
get_thread_info tsk
ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing
mov why, #1
- tst r1, #_TIF_SYSCALL_TRACE @ are we tracing syscalls?
- beq ret_slow_syscall
+ tst r1, #_TIF_SYSCALL_TRACE | _TIF_KERNEL_TRACE
+ beq ret_slow_syscall @ are we tracing syscalls?
mov r1, sp
mov r0, #1 @ trace exit [IP = 1]
bl syscall_trace
@@ -441,8 +443,8 @@ ENTRY(vector_swi)
1:
#endif
- tst r10, #_TIF_SYSCALL_TRACE @ are we tracing syscalls?
- bne __sys_trace
+ tst r10, #_TIF_SYSCALL_TRACE | _TIF_KERNEL_TRACE
+ bne __sys_trace @ are we tracing syscalls?
cmp scno, #NR_syscalls @ check upper syscall limit
adr lr, BSYM(ret_fast_syscall) @ return address
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 94bbedbed63..fe2277c5d8c 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -30,6 +30,7 @@
#include <linux/uaccess.h>
#include <linux/random.h>
#include <linux/hw_breakpoint.h>
+#include <trace/sched.h>
#include <asm/cacheflush.h>
#include <asm/leds.h>
@@ -45,6 +46,8 @@ unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
+DEFINE_TRACE(sched_kthread_create);
+
static const char *processor_modes[] = {
"USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" ,
"UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26",
@@ -442,6 +445,7 @@ asm( ".pushsection .text\n"
pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
struct pt_regs regs;
+ long pid;
memset(&regs, 0, sizeof(regs));
@@ -452,7 +456,10 @@ pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
regs.ARM_pc = (unsigned long)kernel_thread_helper;
regs.ARM_cpsr = regs.ARM_r7 | PSR_I_BIT;
- return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+ pid = do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+
+ trace_sched_kthread_create(fn, pid);
+ return pid;
}
EXPORT_SYMBOL(kernel_thread);
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 2bf27f364d0..03438e9cc06 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -21,10 +21,15 @@
#include <linux/uaccess.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <linux/kallsyms.h>
+#include <trace/syscall.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/traps.h>
+#include <asm/unistd.h>
#define REG_PC 15
#define REG_PSR 16
@@ -52,6 +57,30 @@
#define BREAKINST_THUMB 0xde01
#endif
+DEFINE_TRACE(syscall_entry);
+DEFINE_TRACE(syscall_exit);
+
+extern unsigned long sys_call_table[];
+
+void ltt_dump_sys_call_table(void *call_data)
+{
+ int i;
+ char namebuf[KSYM_NAME_LEN];
+
+ for (i = 0; i < __NR_syscall_max + 1; i++) {
+ sprint_symbol(namebuf, sys_call_table[i]);
+ __trace_mark(0, syscall_state, sys_call_table, call_data,
+ "id %d address %p symbol %s",
+ i, (void*)sys_call_table[i], namebuf);
+ }
+}
+EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table);
+
+void ltt_dump_idt_table(void *call_data)
+{
+}
+EXPORT_SYMBOL_GPL(ltt_dump_idt_table);
+
struct pt_regs_offset {
const char *name;
int offset;
@@ -788,6 +817,11 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno)
{
unsigned long ip;
+ if (!why)
+ trace_syscall_entry(regs, scno);
+ else
+ trace_syscall_exit(regs->ARM_r0);
+
if (!test_thread_flag(TIF_SYSCALL_TRACE))
return scno;
if (!(current->ptrace & PT_PTRACED))
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 21ac43f1c2d..41eb77da882 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -23,6 +23,7 @@
#include <linux/kexec.h>
#include <linux/delay.h>
#include <linux/init.h>
+#include <trace/trap.h>
#include <linux/sched.h>
#include <asm/atomic.h>
@@ -35,6 +36,9 @@
#include "signal.h"
+DEFINE_TRACE(trap_entry);
+DEFINE_TRACE(trap_exit);
+
static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" };
void *vectors_page;
@@ -296,7 +300,11 @@ void arm_notify_die(const char *str, struct pt_regs *regs,
current->thread.error_code = err;
current->thread.trap_no = trap;
+ trace_trap_entry(regs, current->thread.trap_no);
+
force_sig_info(info->si_signo, info, current);
+
+ trace_trap_exit();
} else {
die(str, regs, err);
}
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 898fffe0e9c..1d6f14a584f 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -180,6 +180,7 @@ obj-$(CONFIG_MACH_OMAP_3430SDP) += board-3430sdp.o \
hsmmc.o \
board-flash.o
obj-$(CONFIG_MACH_NOKIA_N8X0) += board-n8x0.o
+obj-$(CONFIG_HAVE_TRACE_CLOCK) += trace-clock.o
obj-$(CONFIG_MACH_NOKIA_RM680) += board-rm680.o \
sdram-nokia.o \
hsmmc.o
diff --git a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
index b2b1e37bb6b..b10d9efd6db 100644
--- a/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
+++ b/arch/arm/mach-omap2/clkt34xx_dpll3m2.c
@@ -24,6 +24,7 @@
#include <plat/clock.h>
#include <plat/sram.h>
#include <plat/sdrc.h>
+#include <asm/trace-clock.h>
#include "clock.h"
#include "clock3xxx.h"
@@ -79,6 +80,8 @@ int omap3_core_dpll_m2_set_rate(struct clk *clk, unsigned long rate)
unlock_dll = 1;
}
+ cpu_hz = arm_fck_p->rate;
+
/*
* XXX This only needs to be done when the CPU frequency changes
*/
diff --git a/arch/arm/mach-omap2/clock34xx.c b/arch/arm/mach-omap2/clock34xx.c
index 287abc48092..8971015538a 100644
--- a/arch/arm/mach-omap2/clock34xx.c
+++ b/arch/arm/mach-omap2/clock34xx.c
@@ -18,6 +18,7 @@
#undef DEBUG
#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/clk.h>
#include <linux/io.h>
@@ -94,6 +95,9 @@ const struct clkops clkops_omap3430es2_dss_usbhost_wait = {
.find_companion = omap2_clk_dflt_find_companion,
};
+unsigned long long cpu_hz;
+EXPORT_SYMBOL(cpu_hz);
+
/**
* omap3430es2_clk_hsotgusb_find_idlest - return CM_IDLEST info for HSOTGUSB
* @clk: struct clk * being enabled
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 2f864e4b085..dcb1dd36c24 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -29,6 +29,7 @@
#include <linux/delay.h>
#include <linux/slab.h>
#include <linux/console.h>
+#include <trace/pm.h>
#include <plat/sram.h>
#include "clockdomain.h"
@@ -41,6 +42,8 @@
#include <asm/tlbflush.h>
+#include <asm/trace-clock.h>
+
#include "cm2xxx_3xxx.h"
#include "cm-regbits-34xx.h"
#include "prm-regbits-34xx.h"
@@ -80,6 +83,11 @@ struct power_state {
struct list_head node;
};
+DEFINE_TRACE(pm_idle_entry);
+DEFINE_TRACE(pm_idle_exit);
+DEFINE_TRACE(pm_suspend_entry);
+DEFINE_TRACE(pm_suspend_exit);
+
static LIST_HEAD(pwrst_list);
static void (*_omap_sram_idle)(u32 *addr, int save_state);
@@ -519,8 +527,23 @@ static void omap3_pm_idle(void)
if (omap_irq_pending() || need_resched())
goto out;
+ trace_pm_idle_entry();
+ save_sync_trace_clock();
+
omap_sram_idle();
+ /*
+ * Resyncing the trace clock should ideally be done much sooner. When
+ * we arrive here, there are already some interrupt handlers which have
+ * run before us, using potentially wrong timestamps. This leads
+ * to problems when restarting the clock (and synchronizing on the 32k
+ * clock) if the cycle counter was still active.
+ * resync_track_clock must ensure that timestamps never ever go
+ * backward.
+ */
+ resync_trace_clock();
+ trace_pm_idle_exit();
+
out:
local_fiq_enable();
local_irq_enable();
@@ -550,7 +573,11 @@ static int omap3_pm_suspend(void)
omap_uart_prepare_suspend();
omap3_intc_suspend();
- omap_sram_idle();
+ trace_pm_suspend_entry();
+ save_sync_trace_clock();
+ omap_sram_idle();
+ resync_trace_clock();
+ trace_pm_suspend_exit();
restore:
/* Restore next_pwrsts */
diff --git a/arch/arm/mach-omap2/trace-clock.c b/arch/arm/mach-omap2/trace-clock.c
new file mode 100644
index 00000000000..3db1cdb8d59
--- /dev/null
+++ b/arch/arm/mach-omap2/trace-clock.c
@@ -0,0 +1,726 @@
+/*
+ * arch/arm/mach-omap2/trace-clock.c
+ *
+ * Trace clock for ARM OMAP3
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> 2009
+ */
+
+#include <linux/module.h>
+#include <linux/clocksource.h>
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/cpufreq.h>
+#include <linux/err.h>
+
+#include <plat/clock.h>
+#include <asm/trace-clock.h>
+#include <asm/pmu.h>
+
+/* depends on CONFIG_OMAP_32K_TIMER */
+/* Need direct access to the clock from arch/arm/mach-omap2/timer-gp.c */
+static struct clocksource *clock;
+
+DEFINE_PER_CPU(struct pm_save_count, pm_save_count);
+EXPORT_PER_CPU_SYMBOL_GPL(pm_save_count);
+
+static void clear_ccnt_ms(unsigned long data);
+
+/* According to timer32k.c, this is a 32768Hz clock, not a 32000Hz clock. */
+#define TIMER_32K_FREQ 32768
+#define TIMER_32K_SHIFT 15
+
+/*
+ * Clear ccnt twice per 31-bit overflow, or 4 times per 32-bits period.
+ */
+static u32 clear_ccnt_interval;
+
+static DEFINE_SPINLOCK(trace_clock_lock);
+static int trace_clock_refcount;
+
+static int print_info_done;
+
+static struct platform_device *reserved_pmu;
+
+static u32 get_mul_fact(u64 max_freq, u64 cur_freq)
+{
+ u64 rem;
+
+ BUG_ON(cur_freq == 0);
+ return __iter_div_u64_rem(max_freq << 10, cur_freq, &rem);
+}
+
+/*
+ * Cycle counter management.
+ */
+
+static inline void write_pmnc(u32 val)
+{
+ __asm__ __volatile__ ("mcr p15, 0, %0, c9, c12, 0" : : "r" (val));
+}
+
+static inline u32 read_pmnc(void)
+{
+ u32 val;
+ __asm__ __volatile__ ("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
+ return val;
+}
+
+static inline void write_ctens(u32 val)
+{
+ __asm__ __volatile__ ("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
+}
+
+static inline u32 read_ctens(void)
+{
+ u32 val;
+ __asm__ __volatile__ ("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
+ return val;
+}
+
+static inline void write_intenc(u32 val)
+{
+ __asm__ __volatile__ ("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
+}
+
+static inline u32 read_intenc(void)
+{
+ u32 val;
+ __asm__ __volatile__ ("mrc p15, 0, %0, c9, c14, 2" : "=r" (val));
+ return val;
+}
+
+static inline void write_useren(u32 val)
+{
+ __asm__ __volatile__ ("mcr p15, 0, %0, c9, c14, 0" : : "r" (val));
+}
+
+static inline u32 read_useren(void)
+{
+ u32 val;
+ __asm__ __volatile__ ("mrc p15, 0, %0, c9, c14, 0" : "=r" (val));
+ return val;
+}
+
+/*
+ * Must disable counter before writing to it.
+ */
+static inline void write_ccnt(u32 val)
+{
+ __asm__ __volatile__ ("mcr p15, 0, %0, c9, c13, 0" : : "r" (val));
+}
+
+/*
+ * Periodical timer handler, clears ccnt most significant bit each half-period
+ * of 31-bit overflow. Makes sure the ccnt never overflows.
+ */
+static void clear_ccnt_ms(unsigned long data)
+{
+ struct pm_save_count *pm_count;
+ unsigned int cycles;
+ unsigned long flags;
+ int cpu;
+
+ cpu = smp_processor_id();
+ pm_count = &per_cpu(pm_save_count, cpu);
+
+ local_irq_save(flags);
+
+ if (!pm_count->fast_clock_ready)
+ goto end;
+
+ isb(); /* clear the pipeline so we can execute ASAP */
+ write_ctens(read_ctens() & ~(1 << 31)); /* disable counter */
+ cycles = read_ccnt();
+ write_ccnt(cycles & ~(1 << 31));
+ isb();
+ write_ctens(read_ctens() | (1 << 31)); /* enable counter */
+ isb();
+end:
+ local_irq_restore(flags);
+
+ mod_timer_pinned(&pm_count->clear_ccnt_ms_timer,
+ jiffies + clear_ccnt_interval);
+}
+
+/*
+ * disabling interrupts to protect against concurrent IPI save/resync.
+ */
+void save_sync_trace_clock(void)
+{
+ struct pm_save_count *pm_count;
+ unsigned long flags;
+ int cpu;
+
+ local_irq_save(flags);
+ cpu = smp_processor_id();
+ pm_count = &per_cpu(pm_save_count, cpu);
+ raw_spin_lock(&pm_count->lock);
+
+ if (!pm_count->refcount)
+ goto end;
+
+ pm_count->ext_32k = clock->read(clock);
+ pm_count->int_fast_clock = trace_clock_read64();
+end:
+ raw_spin_unlock(&pm_count->lock);
+
+ /*
+ * Only enable slow read after saving the clock values.
+ */
+ barrier();
+ pm_count->fast_clock_ready = 0;
+
+ /*
+ * Disable counter to ensure there is no overflow while we are
+ * keeping track of time with ext. clock.
+ */
+ write_ctens(read_ctens() & ~(1 << 31)); /* disable counter */
+ local_irq_restore(flags);
+}
+
+/*
+ * Called with preemption disabled. Read the external clock source directly
+ * and return corresponding time in fast clock source time frame.
+ * Called after time is saved and before it is resynced.
+ * Also used to periodically resync the drifting dvfs clock on external clock.
+ */
+u64 _trace_clock_read_slow(void)
+{
+ struct pm_save_count *pm_count;
+ u64 ref_time;
+ unsigned int count_32k;
+ int cpu;
+
+ cpu = smp_processor_id();
+ pm_count = &per_cpu(pm_save_count, cpu);
+ WARN_ON_ONCE(!pm_count->refcount);
+
+ /*
+ * Set the timer's value MSBs to the same as current 32K timer.
+ */
+ ref_time = pm_count->int_fast_clock;
+ if (!pm_count->init_clock)
+ count_32k = clock->read(clock);
+ else
+ count_32k = pm_count->init_clock;
+
+ /*
+ * Delta done on 32-bits, then casted to u64. Must guarantee
+ * that we are called often enough so the difference does not
+ * overflow 32 bits anyway.
+ */
+ ref_time += (u64)(count_32k - pm_count->ext_32k)
+ * (cpu_hz >> TIMER_32K_SHIFT);
+ return ref_time;
+}
+EXPORT_SYMBOL_GPL(_trace_clock_read_slow);
+
+/*
+ * resynchronize the per-cpu fast clock with the last save_sync values and the
+ * external clock. Called from PM (thread) context and IPI context.
+ */
+void resync_trace_clock(void)
+{
+ struct pm_save_count *pm_count;
+ struct tc_cur_freq *new_cf, *cf;
+ unsigned int new_index, index;
+ u64 ref_time;
+ unsigned long flags;
+ u32 regval;
+ int cpu;
+
+ local_irq_save(flags);
+ cpu = smp_processor_id();
+ pm_count = &per_cpu(pm_save_count, cpu);
+ raw_spin_lock(&pm_count->lock);
+
+ if (!pm_count->refcount)
+ goto end;
+
+ /* Let userspace access performance counter registers */
+ regval = read_useren();
+ regval |= (1 << 0); /* User mode enable */
+ write_useren(regval);
+
+ regval = read_intenc();
+ regval |= (1 << 31); /* CCNT overflow interrupt disable */
+ write_intenc(regval);
+
+ regval = read_pmnc();
+ regval |= (1 << 0); /* Enable all counters */
+ regval &= ~(1 << 3); /* count every cycles */
+ regval &= ~(1 << 5); /* Enable even in non-invasive debug prohib. */
+ write_pmnc(regval);
+
+ ref_time = _trace_clock_read_slow();
+
+ if (pm_count->init_clock)
+ pm_count->init_clock = 0;
+
+ write_ctens(read_ctens() & ~(1 << 31)); /* disable counter */
+ write_ccnt((u32)ref_time & ~(1 << 31));
+ write_ctens(read_ctens() | (1 << 31)); /* enable counter */
+
+ _trace_clock_write_synthetic_tsc(ref_time);
+
+ index = pm_count->index;
+ new_index = 1 - index;
+ cf = &pm_count->cf[index];
+ new_cf = &pm_count->cf[new_index];
+ new_cf->hw_base = ref_time;
+ new_cf->virt_base = ref_time;
+ new_cf->cur_cpu_freq = cpufreq_quick_get(cpu);
+ if (new_cf->cur_cpu_freq == 0)
+ new_cf->cur_cpu_freq = pm_count->max_cpu_freq;
+ new_cf->mul_fact = get_mul_fact(pm_count->max_cpu_freq,
+ new_cf->cur_cpu_freq);
+ new_cf->floor = max(ref_time, cf->floor);
+ barrier();
+ pm_count->index = new_index;
+ barrier(); /* make clock ready before enabling */
+ pm_count->fast_clock_ready = 1;
+
+ /* Delete resync timer if present. Just done its job anyway. */
+ if (pm_count->dvfs_count)
+ del_timer(&pm_count->clock_resync_timer);
+ pm_count->dvfs_count = 0;
+
+ if (unlikely(!print_info_done)) {
+ printk(KERN_INFO "Trace clock using cycle counter at %llu HZ\n"
+ "saved 32k clk value 0x%08X, "
+ "saved cycle counter value 0x%016llX\n"
+ "synthetic value (write, read) 0x%016llX, 0x%016llX\n",
+ cpu_hz,
+ pm_count->ext_32k,
+ pm_count->int_fast_clock,
+ ref_time, trace_clock_read64());
+ printk(KERN_INFO "Reference clock used : %s\n", clock->name);
+ print_info_done = 1;
+ }
+end:
+ raw_spin_unlock(&pm_count->lock);
+ local_irq_restore(flags);
+}
+
+/*
+ * Called with IRQ and FIQ off.
+ */
+static void resync_on_32k(struct pm_save_count *pm_count, int cpu,
+ unsigned int cached_freq, int new_freq)
+{
+ struct tc_cur_freq *new_cf, *cf;
+ u64 ref_time;
+ unsigned int new_index, index;
+
+ index = pm_count->index;
+
+ new_index = 1 - index;
+ cf = &pm_count->cf[index];
+ new_cf = &pm_count->cf[new_index];
+ ref_time = _trace_clock_read_slow();
+ new_cf->hw_base = trace_clock_read_synthetic_tsc();
+ new_cf->virt_base = ref_time;
+ if (cached_freq)
+ new_cf->cur_cpu_freq = cf->cur_cpu_freq;
+ else {
+ new_cf->cur_cpu_freq = new_freq;
+ if (new_cf->cur_cpu_freq == 0)
+ new_cf->cur_cpu_freq = pm_count->max_cpu_freq;
+ }
+ new_cf->mul_fact = get_mul_fact(pm_count->max_cpu_freq,
+ new_cf->cur_cpu_freq);
+ new_cf->floor = max((((new_cf->hw_base - cf->hw_base)
+ * cf->mul_fact) >> 10) + cf->virt_base,
+ cf->floor);
+ barrier();
+ pm_count->index = new_index;
+}
+
+/*
+ * Timer to resynchronize with ext. 32k clock after DVFS update (but not too
+ * often if flooded by DVFS updates).
+ * Necessary to deal with drift caused by DVFS updates.
+ * Per-cpu timer added by cpu freq events, single-shot.
+ */
+static void clock_resync_timer_fct(unsigned long data)
+{
+ struct pm_save_count *pm_count;
+ unsigned long flags;
+ int cpu;
+
+ cpu = smp_processor_id();
+ pm_count = &per_cpu(pm_save_count, cpu);
+
+ local_irq_save(flags);
+ local_fiq_disable(); /* disable fiqs for floor value */
+
+ /* Need to resync if we had more than 1 dvfs event in period */
+ if (pm_count->dvfs_count > 1)
+ resync_on_32k(pm_count, cpu, 1, 0);
+ pm_count->dvfs_count = 0;
+
+ local_fiq_enable();
+ local_irq_restore(flags);
+}
+
+static void prepare_timer(int cpu)
+{
+ struct pm_save_count *pm_count;
+
+ pm_count = &per_cpu(pm_save_count, cpu);
+ init_timer_deferrable(&pm_count->clear_ccnt_ms_timer);
+ pm_count->clear_ccnt_ms_timer.function = clear_ccnt_ms;
+ pm_count->clear_ccnt_ms_timer.expires = jiffies + clear_ccnt_interval;
+
+ init_timer_deferrable(&pm_count->clock_resync_timer);
+ pm_count->clock_resync_timer.function = clock_resync_timer_fct;
+}
+
+static void enable_timer(int cpu)
+{
+ struct pm_save_count *pm_count;
+
+ pm_count = &per_cpu(pm_save_count, cpu);
+ add_timer_on(&pm_count->clear_ccnt_ms_timer, cpu);
+}
+
+static void disable_timer_ipi(void *info)
+{
+ save_sync_trace_clock();
+}
+
+static void disable_timer(int cpu)
+{
+ struct pm_save_count *pm_count;
+
+ pm_count = &per_cpu(pm_save_count, cpu);
+ del_timer_sync(&pm_count->clear_ccnt_ms_timer);
+ if (pm_count->dvfs_count)
+ del_timer_sync(&pm_count->clock_resync_timer);
+ smp_call_function_single(cpu, disable_timer_ipi, NULL, 1);
+}
+
+static void resync_ipi(void *info)
+{
+ resync_trace_clock();
+}
+
+void _start_trace_clock(void)
+{
+ struct pm_save_count *pm_count;
+ u32 ext_32k;
+ u64 old_fast_clock;
+ int cpu;
+
+ ext_32k = clock->read(clock);
+ old_fast_clock = per_cpu(pm_save_count, 0).int_fast_clock;
+
+ for_each_online_cpu(cpu) {
+ pm_count = &per_cpu(pm_save_count, cpu);
+ pm_count->ext_32k = ext_32k;
+ pm_count->int_fast_clock = old_fast_clock;
+ pm_count->refcount = 1;
+ pm_count->init_clock = ext_32k;
+ pm_count->dvfs_count = 0;
+ }
+
+ on_each_cpu(resync_ipi, NULL, 1);
+
+ get_synthetic_tsc();
+
+ for_each_online_cpu(cpu) {
+ prepare_timer(cpu);
+ enable_timer(cpu);
+ }
+}
+
+void _stop_trace_clock(void)
+{
+ struct pm_save_count *pm_count;
+ int cpu;
+
+ per_cpu(pm_save_count, 0).int_fast_clock = trace_clock_read64();
+
+ for_each_online_cpu(cpu) {
+ pm_count = &per_cpu(pm_save_count, cpu);
+ disable_timer(cpu);
+ pm_count->refcount = 0;
+ }
+ put_synthetic_tsc();
+}
+
+void start_trace_clock(void)
+{
+ spin_lock(&trace_clock_lock);
+ if (!trace_clock_refcount)
+ goto end;
+ _start_trace_clock();
+end:
+ spin_unlock(&trace_clock_lock);
+}
+
+void stop_trace_clock(void)
+{
+ spin_lock(&trace_clock_lock);
+ if (!trace_clock_refcount)
+ goto end;
+ _stop_trace_clock();
+end:
+ spin_unlock(&trace_clock_lock);
+}
+
+/*
+ * hotcpu_callback - CPU hotplug callback
+ * @nb: notifier block
+ * @action: hotplug action to take
+ * @hcpu: CPU number
+ *
+ * Start/stop timers for trace clock upon cpu hotplug.
+ * Also resync the clock.
+ *
+ * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD)
+ */
+static int __cpuinit hotcpu_callback(struct notifier_block *nb,
+ unsigned long action,
+ void *hcpu)
+{
+ struct pm_save_count *pm_count;
+ unsigned int hotcpu = (unsigned long)hcpu;
+ unsigned long flags;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ spin_lock(&trace_clock_lock);
+ if (trace_clock_refcount) {
+ pm_count = &per_cpu(pm_save_count, hotcpu);
+ local_irq_save(flags);
+ pm_count->ext_32k = clock->read(clock);
+ pm_count->int_fast_clock = trace_clock_read64();
+ local_irq_restore(flags);
+ pm_count->refcount = 1;
+ pm_count->dvfs_count = 0;
+ prepare_timer(hotcpu);
+ }
+ spin_unlock(&trace_clock_lock);
+ break;
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ spin_lock(&trace_clock_lock);
+ if (trace_clock_refcount) {
+ resync_trace_clock();
+ enable_timer(hotcpu);
+ }
+ spin_unlock(&trace_clock_lock);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ spin_lock(&trace_clock_lock);
+ if (trace_clock_refcount)
+ disable_timer(hotcpu);
+ spin_unlock(&trace_clock_lock);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ spin_lock(&trace_clock_lock);
+ if (trace_clock_refcount) {
+ pm_count = &per_cpu(pm_save_count, hotcpu);
+ pm_count->refcount = 0;
+ }
+ spin_unlock(&trace_clock_lock);
+ break;
+#endif /* CONFIG_HOTPLUG_CPU */
+ }
+ return NOTIFY_OK;
+}
+
+int get_trace_clock(void)
+{
+ int ret = 0;
+
+ spin_lock(&trace_clock_lock);
+ if (trace_clock_refcount)
+ goto end;
+ reserved_pmu = reserve_pmu(ARM_PMU_DEVICE_CPU);
+ if (IS_ERR_OR_NULL(reserved_pmu) && PTR_ERR(reserved_pmu) != -ENODEV) {
+ ret = -EBUSY;
+ goto end;
+ }
+ trace_clock_refcount++;
+ _start_trace_clock();
+end:
+ spin_unlock(&trace_clock_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(get_trace_clock);
+
+void put_trace_clock(void)
+{
+ spin_lock(&trace_clock_lock);
+ WARN_ON(trace_clock_refcount <= 0);
+ if (trace_clock_refcount != 1)
+ goto end;
+ _stop_trace_clock();
+ release_pmu(reserved_pmu);
+end:
+ trace_clock_refcount--;
+ spin_unlock(&trace_clock_lock);
+}
+EXPORT_SYMBOL_GPL(put_trace_clock);
+
+/*
+ * We do not use prechange hook to sample 2 clock values and average because
+ * locking wrt other timers can be difficult to get right.
+ * A bit more imprecision just increases the drift. We have a periodic timer
+ * in place to resynchronize periodically on the 32k clock anyway.
+ */
+static int cpufreq_trace_clock(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct cpufreq_freqs *freq = data;
+ struct pm_save_count *pm_count;
+ struct tc_cur_freq *new_cf, *cf;
+ unsigned long flags;
+ unsigned int new_index, index;
+ u64 post_val;
+ int cpu;
+
+#if 0 /* debug trace_mark */
+ trace_mark(test, freq_change,
+ "%s cpu %u oldfreq %u newfreq %u const %u",
+ (val != CPUFREQ_POSTCHANGE) ? "prechange" : "postchange",
+ freq->cpu, freq->old, freq->new,
+ (freq->flags & CPUFREQ_CONST_LOOPS) ? 1 : 0);
+#endif
+
+ if (freq->flags & CPUFREQ_CONST_LOOPS)
+ return 0;
+
+ if (val != CPUFREQ_POSTCHANGE)
+ return 0;
+
+ local_irq_save(flags);
+ cpu = smp_processor_id();
+ WARN_ON_ONCE(cpu != freq->cpu);
+ pm_count = &per_cpu(pm_save_count, cpu);
+ raw_spin_lock(&pm_count->lock);
+
+ if (!pm_count->refcount)
+ goto end;
+
+ /*
+ * Disable FIQs to ensure the floor value is indeed the
+ * floor.
+ */
+ local_fiq_disable();
+
+ if (!pm_count->dvfs_count) {
+ resync_on_32k(pm_count, cpu, 0, freq->new);
+ pm_count->clock_resync_timer.expires = jiffies
+ + (TC_RESYNC_PERIOD * HZ / 1000);
+ add_timer_on(&pm_count->clock_resync_timer, cpu);
+ } else {
+ post_val = trace_clock_read_synthetic_tsc();
+ /* disable irqs to ensure we are the only value modifier */
+ index = pm_count->index;
+ new_index = 1 - index;
+ cf = &pm_count->cf[index];
+ new_cf = &pm_count->cf[new_index];
+ new_cf->hw_base = post_val;
+ new_cf->virt_base = (((post_val - cf->hw_base)
+ * cf->mul_fact) >> 10) + cf->virt_base;
+ new_cf->cur_cpu_freq = freq->new;
+ new_cf->mul_fact = get_mul_fact(pm_count->max_cpu_freq,
+ freq->new);
+ new_cf->floor = max((((post_val - cf->hw_base)
+ * cf->mul_fact) >> 10) + cf->virt_base,
+ cf->floor);
+ barrier();
+ pm_count->index = new_index;
+ }
+
+ local_fiq_enable();
+ pm_count->dvfs_count++;
+end:
+ raw_spin_unlock(&pm_count->lock);
+ local_irq_restore(flags);
+ return 0;
+}
+
+static struct notifier_block cpufreq_trace_clock_nb = {
+ .notifier_call = cpufreq_trace_clock,
+};
+
+#ifdef CONFIG_DEBUG_TRACE_CLOCK
+/*
+ * Clock expected to never overflow and never go backward.
+ */
+static DEFINE_PER_CPU(u64, last_clock_value);
+static DEFINE_PER_CPU(u32, last_ccnt_value);
+DEFINE_PER_CPU(unsigned int, last_clock_nest);
+EXPORT_PER_CPU_SYMBOL_GPL(last_clock_nest);
+
+static int tc_print_done;
+
+/*
+ * Called with interrupts disabled.
+ */
+void trace_clock_debug(u64 value)
+{
+ int cpu;
+
+ cpu = smp_processor_id();
+ if (unlikely(per_cpu(last_clock_nest, cpu) != 1))
+ return; /* fiq nesting, don't perform racy check */
+ if (unlikely(!tc_print_done
+ && (per_cpu(last_clock_value, cpu) > value))) {
+ printk(KERN_WARNING "Trace clock going back last %llu new %llu "
+ "diff %llu last_ccnt %u ccnt %u\n",
+ (unsigned long long) per_cpu(last_clock_value, cpu),
+ (unsigned long long) value,
+ (unsigned long long) per_cpu(last_clock_value, cpu)
+ - value,
+ per_cpu(last_ccnt_value, cpu),
+ trace_clock_read32());
+ tc_print_done = 1;
+ }
+ per_cpu(last_clock_value, cpu) = value;
+ per_cpu(last_ccnt_value, cpu) = trace_clock_read32();;
+}
+EXPORT_SYMBOL_GPL(trace_clock_debug);
+#endif
+
+static __init int init_trace_clock(void)
+{
+ int cpu, ret;
+ u64 rem;
+
+ ret = init_pmu(ARM_PMU_DEVICE_CPU);
+ if (ret && ret != -ENODEV)
+ return ret;
+ clock = get_clocksource_32k();
+ /*
+ * clear_ccnt_interval based on the cpu fastest frequency. Never
+ * recomputed.
+ */
+ clear_ccnt_interval = __iter_div_u64_rem(HZ * (1ULL << 30), cpu_hz,
+ &rem);
+ printk(KERN_INFO "LTTng will clear ccnt top bit every %u jiffies.\n",
+ clear_ccnt_interval);
+ for_each_possible_cpu(cpu) {
+ per_cpu(pm_save_count, cpu).max_cpu_freq =
+ __iter_div_u64_rem(cpu_hz, 1000, &rem);
+ per_cpu(pm_save_count, cpu).lock =
+ __RAW_SPIN_LOCK_UNLOCKED(per_cpu(pm_save_count,
+ cpu).lock);
+ }
+ hotcpu_notifier(hotcpu_callback, 4);
+ cpufreq_register_notifier(&cpufreq_trace_clock_nb,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ return 0;
+}
+__initcall(init_trace_clock);
diff --git a/arch/arm/plat-omap/Kconfig b/arch/arm/plat-omap/Kconfig
index b6333ae3f92..99593d71a85 100644
--- a/arch/arm/plat-omap/Kconfig
+++ b/arch/arm/plat-omap/Kconfig
@@ -17,6 +17,10 @@ config ARCH_OMAP1
config ARCH_OMAP2PLUS
bool "TI OMAP2/3/4"
+ select COMMON_CLKDEV
+ select HAVE_TRACE_CLOCK
+ select HAVE_TRACE_CLOCK_32_TO_64
+ select OMAP_32K_TIMER
select CLKDEV_LOOKUP
select OMAP_DM_TIMER
help
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 862dda95d61..8627a516688 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -107,6 +107,11 @@ static struct clocksource clocksource_32k = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
+struct clocksource *get_clocksource_32k(void)
+{
+ return &clocksource_32k;
+}
+
/*
* Returns current time from boot in nsecs. It's OK for this to wrap
* around for now, as it's just a relative time stamp.
diff --git a/arch/arm/plat-omap/include/plat/clock.h b/arch/arm/plat-omap/include/plat/clock.h
index 8eb0adab19e..2ad01f37a3c 100644
--- a/arch/arm/plat-omap/include/plat/clock.h
+++ b/arch/arm/plat-omap/include/plat/clock.h
@@ -297,4 +297,6 @@ extern const struct clkops clkops_null;
extern struct clk dummy_ck;
+struct clocksource *get_clocksource_32k(void);
+
#endif
diff --git a/arch/arm/plat-omap/include/plat/trace-clock.h b/arch/arm/plat-omap/include/plat/trace-clock.h
new file mode 100644
index 00000000000..7fcdbf98063
--- /dev/null
+++ b/arch/arm/plat-omap/include/plat/trace-clock.h
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2009 Mathieu Desnoyers
+ *
+ * Trace clock ARM OMAP3 definitions.
+ */
+
+#ifndef _ASM_ARM_TRACE_CLOCK_OMAP3_H
+#define _ASM_ARM_TRACE_CLOCK_OMAP3_H
+
+#include <linux/clk.h>
+#include <linux/timer.h>
+#include <linux/percpu.h>
+#include <plat/clock.h>
+
+/*
+ * Number of hardware clock bits. The higher order bits are expected to be 0.
+ * If the hardware clock source has more than 32 bits, the bits higher than the
+ * 32nd will be truncated by a cast to a 32 bits unsigned. Range : 1 - 32.
+ * (too few bits would be unrealistic though, since we depend on the timer to
+ * detect the overflows).
+ * OMAP3-specific : we clear bit 31 periodically so it never overflows. There
+ * is a hardware bug with CP14 and CP15 being executed at the same time a ccnt
+ * overflow occurs.
+ *
+ * Siarhei Siamashka <siarhei.siamashka@nokia.com> :
+ * Performance monitoring unit breaks if somebody is accessing CP14/CP15
+ * coprocessor register exactly at the same time as CCNT overflows (regardless
+ * of the fact if generation of interrupts is enabled or not). A workaround
+ * suggested by ARM was to never allow it to overflow and reset it
+ * periodically.
+ */
+#define TC_HW_BITS 31
+
+/* Expected maximum interrupt latency in ms : 15ms, *2 for security */
+#define TC_EXPECTED_INTERRUPT_LATENCY 30
+
+/* Resync with 32k clock each 100ms */
+#define TC_RESYNC_PERIOD 100
+
+struct tc_cur_freq {
+ u64 cur_cpu_freq; /* in khz */
+ /* cur time : (now - base) * (max_freq / cur_freq) + base */
+ u32 mul_fact; /* (max_cpu_freq << 10) / cur_freq */
+ u64 hw_base; /* stamp of last cpufreq change, hw cycles */
+ u64 virt_base; /* same as above, virtual trace clock cycles */
+ u64 floor; /* floor value, so time never go back */
+};
+
+/* 32KHz counter per-cpu count save upon PM sleep and cpufreq management */
+struct pm_save_count {
+ struct tc_cur_freq cf[2]; /* rcu-protected */
+ unsigned int index; /* tc_cur_freq current read index */
+ /*
+ * Is fast clock ready to be read ? Read with preemption off. Modified
+ * only by local CPU in thread and interrupt context or by start/stop
+ * when time is not read concurrently.
+ */
+ int fast_clock_ready;
+
+ u64 int_fast_clock;
+ struct timer_list clear_ccnt_ms_timer;
+ struct timer_list clock_resync_timer;
+ u32 ext_32k;
+ int refcount;
+ u32 init_clock;
+ raw_spinlock_t lock; /* spinlock only sync the refcount */
+ unsigned int dvfs_count; /* Number of DVFS updates in period */
+ /* cpufreq management */
+ u64 max_cpu_freq; /* in khz */
+};
+
+DECLARE_PER_CPU(struct pm_save_count, pm_save_count);
+
+extern u64 trace_clock_read_synthetic_tsc(void);
+extern void _trace_clock_write_synthetic_tsc(u64 value);
+extern unsigned long long cpu_hz;
+
+DECLARE_PER_CPU(int, fast_clock_ready);
+extern u64 _trace_clock_read_slow(void);
+
+/*
+ * ARM OMAP3 timers only return 32-bits values. We ened to extend it to a
+ * 64-bit value, which is provided by trace-clock-32-to-64.
+ */
+extern u64 trace_clock_async_tsc_read(void);
+/*
+ * Update done by the architecture upon wakeup.
+ */
+extern void _trace_clock_write_synthetic_tsc(u64 value);
+
+#ifdef CONFIG_DEBUG_TRACE_CLOCK
+DECLARE_PER_CPU(unsigned int, last_clock_nest);
+extern void trace_clock_debug(u64 value);
+#else
+static inline void trace_clock_debug(u64 value)
+{
+}
+#endif
+
+static inline u32 read_ccnt(void)
+{
+ u32 val;
+ __asm__ __volatile__ ("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
+ return val & ~(1 << TC_HW_BITS);
+}
+
+static inline u32 trace_clock_read32(void)
+{
+ u32 val;
+
+ isb();
+ val = read_ccnt();
+ isb();
+ return val;
+}
+
+static inline u64 trace_clock_read64(void)
+{
+ struct pm_save_count *pm_count;
+ struct tc_cur_freq *cf;
+ u64 val;
+#ifdef CONFIG_DEBUG_TRACE_CLOCK
+ unsigned long flags;
+
+ local_irq_save(flags);
+ per_cpu(last_clock_nest, smp_processor_id())++;
+ barrier();
+#endif
+
+ preempt_disable();
+ pm_count = &per_cpu(pm_save_count, smp_processor_id());
+ if (likely(pm_count->fast_clock_ready)) {
+ cf = &pm_count->cf[ACCESS_ONCE(pm_count->index)];
+ val = max((((trace_clock_read_synthetic_tsc() - cf->hw_base)
+ * cf->mul_fact) >> 10) + cf->virt_base, cf->floor);
+ } else
+ val = _trace_clock_read_slow();
+ trace_clock_debug(val);
+ preempt_enable();
+
+#ifdef CONFIG_DEBUG_TRACE_CLOCK
+ barrier();
+ per_cpu(last_clock_nest, smp_processor_id())--;
+ local_irq_restore(flags);
+#endif
+ return val;
+}
+
+static inline u64 trace_clock_frequency(void)
+{
+ return cpu_hz;
+}
+
+static inline u32 trace_clock_freq_scale(void)
+{
+ return 1;
+}
+
+extern int get_trace_clock(void);
+extern void put_trace_clock(void);
+extern void get_synthetic_tsc(void);
+extern void put_synthetic_tsc(void);
+
+extern void resync_trace_clock(void);
+extern void save_sync_trace_clock(void);
+extern void start_trace_clock(void);
+extern void stop_trace_clock(void);
+
+static inline void set_trace_clock_is_sync(int state)
+{
+}
+#endif /* _ASM_MIPS_TRACE_CLOCK_OMAP3_H */
diff --git a/arch/avr32/include/asm/thread_info.h b/arch/avr32/include/asm/thread_info.h
index 7a9c03dcb0b..6e882a9584e 100644
--- a/arch/avr32/include/asm/thread_info.h
+++ b/arch/avr32/include/asm/thread_info.h
@@ -66,7 +66,7 @@ static inline struct thread_info *current_thread_info(void)
#endif /* !__ASSEMBLY__ */
-#define PREEMPT_ACTIVE 0x40000000
+#define PREEMPT_ACTIVE 0x10000000
/*
* Thread information flags
@@ -85,6 +85,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_RESTORE_SIGMASK 7 /* restore signal mask in do_signal */
#define TIF_CPU_GOING_TO_SLEEP 8 /* CPU is entering sleep 0 mode */
#define TIF_NOTIFY_RESUME 9 /* callback before returning to user */
+#define TIF_KERNEL_TRACE 10 /* kernel trace active */
#define TIF_FREEZE 29
#define TIF_DEBUG 30 /* debugging enabled */
#define TIF_USERSPACE 31 /* true if FS sets userspace */
@@ -93,28 +94,32 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
+#define _TIF_BREAKPOINT (1 << TIF_BREAKPOINT)
#define _TIF_SINGLE_STEP (1 << TIF_SINGLE_STEP)
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
#define _TIF_CPU_GOING_TO_SLEEP (1 << TIF_CPU_GOING_TO_SLEEP)
+#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_FREEZE (1 << TIF_FREEZE)
+#define _TIF_DEBUG (1 << TIF_DEBUG)
+#define _TIF_USERSPACE (1 << TIF_USERSPACE)
/* Note: The masks below must never span more than 16 bits! */
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
- ((1 << TIF_SIGPENDING) \
+ (_TIF_SIGPENDING \
| _TIF_NOTIFY_RESUME \
- | (1 << TIF_NEED_RESCHED) \
- | (1 << TIF_POLLING_NRFLAG) \
- | (1 << TIF_BREAKPOINT) \
- | (1 << TIF_RESTORE_SIGMASK))
+ | _TIF_NEED_RESCHED \
+ | _TIF_POLLING_NRFLAG \
+ | _TIF_BREAKPOINT \
+ | _TIF_RESTORE_SIGMASK)
/* work to do on any return to userspace */
-#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK | (1 << TIF_SYSCALL_TRACE) | \
- _TIF_NOTIFY_RESUME)
+#define _TIF_ALLWORK_MASK (_TIF_WORK_MASK | _TIF_SYSCALL_TRACE | \
+ _TIF_NOTIFY_RESUME | _TIF_KERNEL_TRACE)
/* work to do on return from debug mode */
-#define _TIF_DBGWORK_MASK (_TIF_WORK_MASK & ~(1 << TIF_BREAKPOINT))
+#define _TIF_DBGWORK_MASK (_TIF_WORK_MASK & ~_TIF_BREAKPOINT)
#endif /* __ASM_AVR32_THREAD_INFO_H */
diff --git a/arch/blackfin/include/asm/thread_info.h b/arch/blackfin/include/asm/thread_info.h
index 02560fd8a12..510e54bfd0a 100644
--- a/arch/blackfin/include/asm/thread_info.h
+++ b/arch/blackfin/include/asm/thread_info.h
@@ -102,8 +102,9 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
#define TIF_FREEZE 6 /* is freezing for suspend */
#define TIF_IRQ_SYNC 7 /* sync pipeline stage */
-#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */
-#define TIF_SINGLESTEP 9
+#define TIF_KERNEL_TRACE 8 /* kernel trace active */
+#define TIF_NOTIFY_RESUME 9 /* callback before returning to user */
+#define TIF_SINGLESTEP 10
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -115,8 +116,9 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_IRQ_SYNC (1<<TIF_IRQ_SYNC)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
+#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE)
-#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK 0x0000FEFE /* work to do on interrupt/exception return */
#endif /* __KERNEL__ */
diff --git a/arch/cris/include/asm/thread_info.h b/arch/cris/include/asm/thread_info.h
index 91776069ca8..bc2024dbe32 100644
--- a/arch/cris/include/asm/thread_info.h
+++ b/arch/cris/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
#define TIF_NOTIFY_RESUME 1 /* resumption notification requested */
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
+#define TIF_KERNEL_TRACE 4 /* kernel trace active */
#define TIF_RESTORE_SIGMASK 9 /* restore signal mask in do_signal() */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
@@ -92,12 +93,16 @@ struct thread_info {
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
+#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_FREEZE (1<<TIF_FREEZE)
-#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
-#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK 0x0000FFFF & \
+ (~_TIF_SYSCALL_TRACE | ~_TIF_KERNEL_TRACE)
+/* work to do on any return to u-space */
+#define _TIF_ALLWORK_MASK 0x0000FFFF
#endif /* __KERNEL__ */
diff --git a/arch/frv/include/asm/thread_info.h b/arch/frv/include/asm/thread_info.h
index 11f33ead29b..8adf256d213 100644
--- a/arch/frv/include/asm/thread_info.h
+++ b/arch/frv/include/asm/thread_info.h
@@ -112,6 +112,7 @@ register struct thread_info *__current_thread_info asm("gr15");
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
+#define TIF_KERNEL_TRACE 6 /* kernel trace active */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
#define TIF_FREEZE 18 /* freezing for suspend */
@@ -122,10 +123,11 @@ register struct thread_info *__current_thread_info asm("gr15");
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
+#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_FREEZE (1 << TIF_FREEZE)
-#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK 0x0000FFBE /* work to do on interrupt/exception return */
#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
/*
diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h
index d6f1784bfde..65685fa4554 100644
--- a/arch/h8300/include/asm/thread_info.h
+++ b/arch/h8300/include/asm/thread_info.h
@@ -90,18 +90,20 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_MEMDIE 4 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 5 /* restore signal mask in do_signal() */
#define TIF_NOTIFY_RESUME 6 /* callback before returning to user */
+#define TIF_KERNEL_TRACE 7 /* kernel trace active */
#define TIF_FREEZE 16 /* is freezing for suspend */
/* as above, but as bit values */
-#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
-#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
-#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
-#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
-#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
+#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
+#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
+#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_FREEZE (1<<TIF_FREEZE)
+#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE)
+#define _TIF_FREEZE (1 << TIF_FREEZE)
-#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK 0x0000FFBE /* work to do on interrupt/exception return */
#endif /* __KERNEL__ */
diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index b6a5ba2aca3..3206bb5575b 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -100,6 +100,7 @@ struct thread_info {
#define TIF_SYSCALL_TRACE 2 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */
#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */
+#define TIF_KERNEL_TRACE 5 /* kernel trace active */
#define TIF_NOTIFY_RESUME 6 /* resumption notification requested */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
@@ -111,7 +112,9 @@ struct thread_info {
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
-#define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP)
+#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE)
+#define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|\
+ _TIF_SINGLESTEP|_TIF_KERNEL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
@@ -124,8 +127,9 @@ struct thread_info {
/* "work to do on user-return" bits */
#define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\
_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE)
-/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
-#define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
+/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE, TIF_KERNEL_TRACE or TIF_SYSCALL_AUDIT */
+#define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE|\
+ _TIF_SYSCALL_AUDIT))
#define TS_POLLING 1 /* true if in idle loop and not sleeping */
#define TS_RESTORE_SIGMASK 2 /* restore signal mask in do_signal() */
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 244704a174d..56c4de30197 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -620,9 +620,11 @@ GLOBAL_ENTRY(ia64_ret_from_clone)
;;
ld4 r2=[r2]
;;
+ movl r8=_TIF_SYSCALL_TRACEAUDIT
+ ;; // added stop bits to prevent r8 dependency
+ and r2=r8,r2
mov r8=0
- and r2=_TIF_SYSCALL_TRACEAUDIT,r2
- ;;
+ ;; // added stop bits to prevent r2 dependency
cmp.ne p6,p0=r2,r0
(p6) br.cond.spnt .strace_check_retval
;; // added stop bits to prevent r8 dependency
diff --git a/arch/m32r/include/asm/thread_info.h b/arch/m32r/include/asm/thread_info.h
index 71faff5bcc2..8538b1a0eaf 100644
--- a/arch/m32r/include/asm/thread_info.h
+++ b/arch/m32r/include/asm/thread_info.h
@@ -139,6 +139,7 @@ static inline unsigned int get_thread_fault_code(void)
#define TIF_SINGLESTEP 3 /* restore singlestep on return to user mode */
#define TIF_IRET 4 /* return with iret */
#define TIF_NOTIFY_RESUME 5 /* callback before returning to user */
+#define TIF_KERNEL_TRACE 6 /* kernel trace active */
#define TIF_RESTORE_SIGMASK 8 /* restore signal mask in do_signal() */
#define TIF_USEDFPU 16 /* FPU was used by this task this quantum (SMP) */
#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
@@ -150,14 +151,19 @@ static inline unsigned int get_thread_fault_code(void)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_IRET (1<<TIF_IRET)
+#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
#define _TIF_USEDFPU (1<<TIF_USEDFPU)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_FREEZE (1<<TIF_FREEZE)
-#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
-#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
+/* work to do on any return to u-space */
+#define _TIF_ALLWORK_MASK 0x0000FFFF
+
+/* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK \
+ (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | _TIF_KERNEL_TRACE))
/*
* Thread-synchronous status.
diff --git a/arch/m68k/include/asm/thread_info.h b/arch/m68k/include/asm/thread_info.h
index 790988967ba..fa8256a17eb 100644
--- a/arch/m68k/include/asm/thread_info.h
+++ b/arch/m68k/include/asm/thread_info.h
@@ -100,6 +100,7 @@ static inline struct thread_info *current_thread_info(void)
*/
#define TIF_SIGPENDING 6 /* signal pending */
#define TIF_NEED_RESCHED 7 /* rescheduling necessary */
+#define TIF_KERNEL_TRACE 13 /* kernel trace active */
#define TIF_DELAYED_TRACE 14 /* single step a syscall */
#define TIF_SYSCALL_TRACE 15 /* syscall trace active */
#define TIF_MEMDIE 16 /* is terminating due to OOM killer */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index f5ecc0566bc..e0c246d26e1 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -17,6 +17,7 @@ config MIPS
select HAVE_KRETPROBES
select RTC_LIB if !MACH_LOONGSON
select GENERIC_ATOMIC64 if !64BIT
+ select HAVE_LTT_DUMP_TABLES
select HAVE_DMA_ATTRS
select HAVE_DMA_API_DEBUG
select HAVE_GENERIC_HARDIRQS
@@ -1963,6 +1964,20 @@ config CPU_R4000_WORKAROUNDS
config CPU_R4400_WORKAROUNDS
bool
+config HAVE_GET_CYCLES_32
+ def_bool y
+ depends on !CPU_R4400_WORKAROUNDS
+ depends on !CPU_CAVIUM_OCTEON
+ select HAVE_TRACE_CLOCK
+ select HAVE_TRACE_CLOCK_32_TO_64
+ select HAVE_UNSYNCHRONIZED_TSC
+
+config HAVE_GET_CYCLES
+ def_bool y
+ depends on CPU_CAVIUM_OCTEON
+ select HAVE_TRACE_CLOCK
+ select HAVE_UNSYNCHRONIZED_TSC
+
#
# - Highmem only makes sense for the 32-bit kernel.
# - The current highmem code will only work properly on physically indexed
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index c0884f02d3a..1419b787e1e 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -178,4 +178,10 @@
#define nudge_writes() mb()
#endif
+/*
+ * MIPS does not have any instruction to serialize instruction execution on the
+ * core.
+ */
+#define sync_core()
+
#endif /* __ASM_BARRIER_H */
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index 4d987097538..44f631b39ff 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -438,6 +438,7 @@
*/
#define CAUSEB_EXCCODE 2
#define CAUSEF_EXCCODE (_ULCAST_(31) << 2)
+#define CAUSE_EXCCODE(cause) (((cause) & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE)
#define CAUSEB_IP 8
#define CAUSEF_IP (_ULCAST_(255) << 8)
#define CAUSEB_IP0 8
diff --git a/arch/mips/include/asm/octeon/trace-clock.h b/arch/mips/include/asm/octeon/trace-clock.h
new file mode 100644
index 00000000000..062662b732a
--- /dev/null
+++ b/arch/mips/include/asm/octeon/trace-clock.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2005,2008 Mathieu Desnoyers
+ *
+ * Trace clock MIPS Octeon definitions.
+ */
+
+#ifndef _ASM_MIPS_OCTEON_TRACE_CLOCK_H
+#define _ASM_MIPS_OCTEON_TRACE_CLOCK_H
+
+#include <asm/octeon/octeon.h>
+
+#define TC_HW_BITS 64
+
+static inline u32 trace_clock_read32(void)
+{
+ return (u32)read_c0_cvmcount(); /* only need the 32 LSB */
+}
+
+static inline u64 trace_clock_read64(void)
+{
+ return read_c0_cvmcount();
+}
+
+static inline u64 trace_clock_frequency(void)
+{
+ return octeon_get_clock_rate();
+}
+
+static inline u32 trace_clock_freq_scale(void)
+{
+ return 1;
+}
+
+static inline int get_trace_clock(void)
+{
+ return 0;
+}
+
+static inline void put_trace_clock(void)
+{
+ return;
+}
+#endif /* _ASM_MIPS_OCTEON_TRACE_CLOCK_H */
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index d309556cacf..eb7f7b99038 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -122,6 +122,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
#define TIF_32BIT_ADDR 23 /* 32-bit address space (o32/n32) */
#define TIF_FPUBOUND 24 /* thread bound to FPU-full CPU set */
#define TIF_LOAD_WATCH 25 /* If set, load watch registers */
+#define TIF_KERNEL_TRACE 30 /* kernel trace active */
#define TIF_SYSCALL_TRACE 31 /* syscall trace active */
#ifdef CONFIG_MIPS32_O32
@@ -131,6 +132,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
#endif /* CONFIG_MIPS32_O32 */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
@@ -151,7 +153,7 @@ register struct thread_info *__current_thread_info __asm__("$28");
#define _TIF_WORK_MASK (0x0000ffef & \
~(_TIF_SECCOMP | _TIF_SYSCALL_AUDIT))
/* work to do on any return to u-space */
-#define _TIF_ALLWORK_MASK (0x8000ffff & ~_TIF_SECCOMP)
+#define _TIF_ALLWORK_MASK (0xc000ffff & ~_TIF_SECCOMP)
#endif /* __KERNEL__ */
diff --git a/arch/mips/include/asm/timex.h b/arch/mips/include/asm/timex.h
index 6529704aa73..6c150979fa2 100644
--- a/arch/mips/include/asm/timex.h
+++ b/arch/mips/include/asm/timex.h
@@ -20,6 +20,8 @@
*/
#define CLOCK_TICK_RATE 1193182
+extern unsigned int mips_hpt_frequency;
+
/*
* Standard way to access the cycle counter.
* Currently only used on SMP for scheduling.
@@ -29,14 +31,109 @@
* which isn't an evil thing.
*
* We know that all SMP capable CPUs have cycle counters.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ * HAVE_GET_CYCLES makes sure that this case is handled properly :
+ *
+ * Ralf Baechle <ralf@linux-mips.org> :
+ * This avoids us executing an mfc0 c0_count instruction on processors which
+ * don't have but also on certain R4000 and R4400 versions where reading from
+ * the count register just in the very moment when its value equals c0_compare
+ * will result in the timer interrupt getting lost.
*/
+#ifdef CONFIG_HAVE_GET_CYCLES
+# ifdef CONFIG_CPU_CAVIUM_OCTEON
+typedef unsigned long cycles_t;
+
+static inline cycles_t get_cycles(void)
+{
+ return read_c0_cvmcount();
+}
+
+static inline void get_cycles_barrier(void)
+{
+}
+
+static inline cycles_t get_cycles_rate(void)
+{
+ return mips_hpt_frequency;
+}
+
+extern int test_tsc_synchronization(void);
+extern int _tsc_is_sync;
+static inline int tsc_is_sync(void)
+{
+ return _tsc_is_sync;
+}
+# else /* #ifdef CONFIG_CPU_CAVIUM_OCTEON */
+# error "64-bit get_cycles() supported only on Cavium Octeon MIPS architectures"
+# endif /* #else #ifdef CONFIG_CPU_CAVIUM_OCTEON */
+#elif defined(CONFIG_HAVE_GET_CYCLES_32)
typedef unsigned int cycles_t;
static inline cycles_t get_cycles(void)
{
+ return read_c0_count();
+}
+
+static inline void get_cycles_barrier(void)
+{
+}
+
+static inline cycles_t get_cycles_rate(void)
+{
+ return mips_hpt_frequency;
+}
+
+extern int test_tsc_synchronization(void);
+extern int _tsc_is_sync;
+static inline int tsc_is_sync(void)
+{
+ return _tsc_is_sync;
+}
+#else
+typedef unsigned int cycles_t;
+
+static inline cycles_t get_cycles(void)
+{
+ return 0;
+}
+static inline int test_tsc_synchronization(void)
+{
return 0;
}
+static inline int tsc_is_sync(void)
+{
+ return 0;
+}
+#endif
+
+#define DELAY_INTERRUPT 100
+/*
+ * Only updates 32 LSB.
+ */
+static inline void write_tsc(u32 val1, u32 val2)
+{
+ write_c0_count(val1);
+ /* Arrange for an interrupt in a short while */
+ write_c0_compare(read_c0_count() + DELAY_INTERRUPT);
+}
+
+/*
+ * Currently unused, should update internal tsc-related timekeeping sources.
+ */
+static inline void mark_tsc_unstable(char *reason)
+{
+}
+
+/*
+ * Currently simply use the tsc_is_sync value.
+ */
+static inline int unsynchronized_tsc(void)
+{
+ return !tsc_is_sync();
+}
#endif /* __KERNEL__ */
diff --git a/arch/mips/include/asm/trace-clock.h b/arch/mips/include/asm/trace-clock.h
new file mode 100644
index 00000000000..9bbcf999bef
--- /dev/null
+++ b/arch/mips/include/asm/trace-clock.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2005,2008 Mathieu Desnoyers
+ *
+ * Trace clock MIPS definitions.
+ */
+
+#ifndef _ASM_MIPS_TRACE_CLOCK_H
+#define _ASM_MIPS_TRACE_CLOCK_H
+
+#include <linux/timex.h>
+#include <asm/processor.h>
+
+#define TRACE_CLOCK_MIN_PROBE_DURATION 200
+
+#ifdef CONFIG_CPU_CAVIUM_OCTEON
+# include <asm/octeon/trace-clock.h>
+#else /* !CONFIG_CPU_CAVIUM_OCTEON */
+/*
+ * Number of hardware clock bits. The higher order bits are expected to be 0.
+ * If the hardware clock source has more than 32 bits, the bits higher than the
+ * 32nd will be truncated by a cast to a 32 bits unsigned. Range : 1 - 32.
+ * (too few bits would be unrealistic though, since we depend on the timer to
+ * detect the overflows).
+ */
+#define TC_HW_BITS 32
+
+/* Expected maximum interrupt latency in ms : 15ms, *2 for security */
+#define TC_EXPECTED_INTERRUPT_LATENCY 30
+
+extern u64 trace_clock_read_synthetic_tsc(void);
+
+/*
+ * MIPS get_cycles only returns a 32 bits TSC (see timex.h). The assumption
+ * there is that the reschedule is done every 8 seconds or so. Given that
+ * tracing needs to detect delays longer than 8 seconds, we need a full 64-bits
+ * TSC, whic is provided by trace-clock-32-to-64.
+*/
+
+static inline u32 trace_clock_read32(void)
+{
+ return (u32)get_cycles(); /* only need the 32 LSB */
+}
+
+static inline u64 trace_clock_read64(void)
+{
+ return trace_clock_read_synthetic_tsc();
+}
+
+static inline u64 trace_clock_frequency(void)
+{
+ return get_cycles_rate();
+}
+
+static inline u32 trace_clock_freq_scale(void)
+{
+ return 1;
+}
+
+extern void get_synthetic_tsc(void);
+extern void put_synthetic_tsc(void);
+
+static inline int get_trace_clock(void)
+{
+ get_synthetic_tsc();
+ return 0;
+}
+
+static inline void put_trace_clock(void)
+{
+ put_synthetic_tsc();
+}
+#endif /* CONFIG_CPU_CAVIUM_OCTEON */
+
+static inline void set_trace_clock_is_sync(int state)
+{
+}
+#endif /* _ASM_MIPS_TRACE_CLOCK_H */
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index ffa331029e0..8c5410f97e7 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -167,7 +167,7 @@ work_notifysig: # deal with pending signals and
FEXPORT(syscall_exit_work_partial)
SAVE_STATIC
syscall_exit_work:
- li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+ li t0, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_KERNEL_TRACE
and t0, a2 # a2 is preloaded with TI_FLAGS
beqz t0, work_pending # trace bit set?
local_irq_enable # could let do_syscall_trace()
diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c
index 876a75cc376..76a82609626 100644
--- a/arch/mips/kernel/linux32.c
+++ b/arch/mips/kernel/linux32.c
@@ -34,6 +34,7 @@
#include <linux/vfs.h>
#include <linux/ipc.h>
#include <linux/slab.h>
+#include <trace/ipc.h>
#include <net/sock.h>
#include <net/scm.h>
@@ -44,6 +45,8 @@
#include <asm/mmu_context.h>
#include <asm/mman.h>
+DEFINE_TRACE(ipc_call);
+
/* Use this to get at 32-bit user passed pointers. */
/* A() macro should be used for places where you e.g.
have some internal variable u32 and just want to get
@@ -166,6 +169,8 @@ SYSCALL_DEFINE6(32_ipc, u32, call, long, first, long, second, long, third,
version = call >> 16; /* hack for backward compatibility */
call &= 0xffff;
+ trace_ipc_call(call, first);
+
switch (call) {
case SEMOP:
/* struct sembuf is the same on 32 and 64bit :)) */
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index ae167df73dd..7d9bb1cdd7f 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -25,6 +25,7 @@
#include <linux/completion.h>
#include <linux/kallsyms.h>
#include <linux/random.h>
+#include <trace/sched.h>
#include <asm/asm.h>
#include <asm/bootinfo.h>
@@ -42,6 +43,8 @@
#include <asm/inst.h>
#include <asm/stacktrace.h>
+DEFINE_TRACE(sched_kthread_create);
+
/*
* The idle thread. There's no useful work to be done, so just try to conserve
* power and have a low exit latency (ie sit in a loop waiting for somebody to
@@ -234,6 +237,7 @@ static void __noreturn kernel_thread_helper(void *arg, int (*fn)(void *))
long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
struct pt_regs regs;
+ long pid;
memset(&regs, 0, sizeof(regs));
@@ -249,7 +253,10 @@ long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
#endif
/* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+ pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED,
+ 0, &regs, 0, NULL, NULL);
+ trace_sched_kthread_create(fn, pid);
+ return pid;
}
/*
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index d21c388c011..79e1750cc7c 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -25,6 +25,7 @@
#include <linux/security.h>
#include <linux/audit.h>
#include <linux/seccomp.h>
+#include <trace/syscall.h>
#include <asm/byteorder.h>
#include <asm/cpu.h>
@@ -39,6 +40,9 @@
#include <asm/bootinfo.h>
#include <asm/reg.h>
+DEFINE_TRACE(syscall_entry);
+DEFINE_TRACE(syscall_exit);
+
/*
* Called by kernel/ptrace.c when detaching..
*
@@ -535,6 +539,11 @@ static inline int audit_arch(void)
*/
asmlinkage void do_syscall_trace(struct pt_regs *regs, int entryexit)
{
+ if (!entryexit)
+ trace_syscall_entry(regs, regs->regs[2]);
+ else
+ trace_syscall_exit(regs->regs[2]);
+
/* do the secure computing check first */
if (!entryexit)
secure_computing(regs->regs[2]);
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index fbaabad0e6e..1b90e8255da 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -52,7 +52,7 @@ NESTED(handle_sys, PT_SIZE, sp)
stack_done:
lw t0, TI_FLAGS($28) # syscall tracing enabled?
- li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+ li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_KERNEL_TRACE
and t0, t1
bnez t0, syscall_trace_entry # -> yes
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 3f417928320..c574a1a12f2 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -54,7 +54,7 @@ NESTED(handle_sys64, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
- li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+ li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_KERNEL_TRACE
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
bnez t0, syscall_trace_entry
@@ -126,7 +126,8 @@ illegal_syscall:
END(handle_sys64)
.align 3
-sys_call_table:
+ .type sys_call_table,@object
+EXPORT(sys_call_table)
PTR sys_read /* 5000 */
PTR sys_write
PTR sys_open
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index f08ece6d8ac..0d312c2d54d 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -53,7 +53,7 @@ NESTED(handle_sysn32, PT_SIZE, sp)
sd a3, PT_R26(sp) # save a3 for syscall restarting
- li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+ li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_KERNEL_TRACE
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
bnez t0, n32_syscall_trace_entry
@@ -121,6 +121,8 @@ not_n32_scall:
END(handle_sysn32)
+ .align 3
+ .type sysn32_call_table,@object
EXPORT(sysn32_call_table)
PTR sys_read /* 6000 */
PTR sys_write
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 78d768a3e19..635d0d84344 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -53,7 +53,7 @@ NESTED(handle_sys, PT_SIZE, sp)
sll a3, a3, 0
dsll t0, v0, 3 # offset into table
- ld t2, (sys_call_table - (__NR_O32_Linux * 8))(t0)
+ ld t2, (syso32_call_table - (__NR_O32_Linux * 8))(t0)
sd a3, PT_R26(sp) # save a3 for syscall restarting
@@ -81,7 +81,7 @@ NESTED(handle_sys, PT_SIZE, sp)
PTR 4b, bad_stack
.previous
- li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT
+ li t1, _TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_KERNEL_TRACE
LONG_L t0, TI_FLAGS($28) # syscall tracing enabled?
and t0, t1, t0
bnez t0, trace_a_syscall
@@ -180,7 +180,7 @@ LEAF(sys32_syscall)
beqz t0, einval # do not recurse
dsll t1, t0, 3
beqz v0, einval
- ld t2, sys_call_table(t1) # syscall routine
+ ld t2, syso32_call_table(t1) # syscall routine
move a0, a1 # shift argument registers
move a1, a2
@@ -202,8 +202,8 @@ einval: li v0, -ENOSYS
END(sys32_syscall)
.align 3
- .type sys_call_table,@object
-sys_call_table:
+ .type syso32_call_table,@object
+EXPORT(syso32_call_table)
PTR sys32_syscall /* 4000 */
PTR sys_exit
PTR sys_fork
diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c
index 383aeb95cb4..3faf9d20ee6 100644
--- a/arch/mips/kernel/smp.c
+++ b/arch/mips/kernel/smp.c
@@ -164,6 +164,9 @@ void __init smp_cpus_done(unsigned int max_cpus)
{
mp_ops->cpus_done();
synchronise_count_master();
+#ifdef CONFIG_HAVE_UNSYNCHRONIZED_TSC
+ test_tsc_synchronization();
+#endif
}
/* called from main before smp_init() */
diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c
index 1dc6edff45e..9965dedbcc1 100644
--- a/arch/mips/kernel/syscall.c
+++ b/arch/mips/kernel/syscall.c
@@ -31,6 +31,8 @@
#include <linux/slab.h>
#include <linux/random.h>
#include <linux/elf.h>
+#include <linux/ipc.h>
+#include <linux/kallsyms.h>
#include <asm/asm.h>
#include <asm/branch.h>
@@ -464,3 +466,67 @@ int kernel_execve(const char *filename,
return -__v0;
}
+
+void ltt_dump_sys_call_table(void *call_data)
+{
+ int i;
+ char namebuf[KSYM_NAME_LEN];
+
+#ifdef CONFIG_32BIT
+ for (i = 0; i < __NR_O32_Linux_syscalls; i++) {
+ extern struct {
+ unsigned long ptr;
+ long j;
+ } sys_call_table[];
+
+ sprint_symbol(namebuf, sys_call_table[i].ptr);
+ __trace_mark(0, syscall_state, sys_call_table, call_data,
+ "id %d address %p symbol %s",
+ i + __NR_O32_Linux, (void *)sys_call_table[i].ptr,
+ namebuf);
+ }
+#endif
+#ifdef CONFIG_64BIT
+# ifdef CONFIG_MIPS32_O32
+ for (i = 0; i < __NR_O32_Linux_syscalls; i++) {
+ extern unsigned long syso32_call_table[];
+
+ sprint_symbol(namebuf, syso32_call_table[i]);
+ __trace_mark(0, syscall_state, sys_call_table, call_data,
+ "id %d address %p symbol %s",
+ i + __NR_O32_Linux, (void *)syso32_call_table[i],
+ namebuf);
+ }
+# endif
+
+ for (i = 0; i < __NR_64_Linux_syscalls; i++) {
+ extern unsigned long sys_call_table[];
+
+ sprint_symbol(namebuf, sys_call_table[i]);
+ __trace_mark(0, syscall_state, sys_call_table, call_data,
+ "id %d address %p symbol %s",
+ i + __NR_64_Linux, (void *)sys_call_table[i],
+ namebuf);
+ }
+
+# ifdef CONFIG_MIPS32_N32
+ for (i = 0; i < __NR_N32_Linux_syscalls; i++) {
+ extern unsigned long sysn32_call_table[];
+
+ sprint_symbol(namebuf, sysn32_call_table[i]);
+ __trace_mark(0, syscall_state, sys_call_table, call_data,
+ "id %d address %p symbol %s",
+ i + __NR_N32_Linux, (void *)sysn32_call_table[i],
+ namebuf);
+ }
+# endif
+#endif
+}
+EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table);
+
+void ltt_dump_idt_table(void *call_data)
+{
+ /* No IDT information yet. */
+ return;
+}
+EXPORT_SYMBOL_GPL(ltt_dump_idt_table);
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index fb749740551..51561a75dcf 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -70,6 +70,7 @@ EXPORT_SYMBOL(perf_irq);
*/
unsigned int mips_hpt_frequency;
+EXPORT_SYMBOL(mips_hpt_frequency);
/*
* This function exists in order to cause an error due to a duplicate
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 71350f7f2d8..b6a12d70e8c 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -30,6 +30,7 @@
#include <linux/kdb.h>
#include <linux/irq.h>
#include <linux/perf_event.h>
+#include <trace/trap.h>
#include <asm/bootinfo.h>
#include <asm/branch.h>
@@ -55,6 +56,12 @@
#include <asm/stacktrace.h>
#include <asm/uasm.h>
+/*
+ * Also used in unaligned.c and fault.c.
+ */
+DEFINE_TRACE(trap_entry);
+DEFINE_TRACE(trap_exit);
+
extern void check_wait(void);
extern asmlinkage void r4k_wait(void);
extern asmlinkage void rollback_handle_int(void);
@@ -321,7 +328,7 @@ static void __show_regs(const struct pt_regs *regs)
printk("Cause : %08x\n", cause);
- cause = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
+ cause = CAUSE_EXCCODE(cause);
if (1 <= cause && cause <= 5)
printk("BadVA : %0*lx\n", field, regs->cp0_badvaddr);
@@ -698,6 +705,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
return;
die_if_kernel("FP exception in kernel code", regs);
+ trace_trap_entry(regs, CAUSE_EXCCODE(regs->cp0_cause));
if (fcr31 & FPU_CSR_UNI_X) {
int sig;
void __user *fault_addr = NULL;
@@ -730,7 +738,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
/* If something went wrong, signal */
process_fpemu_return(sig, fault_addr);
-
+ trace_trap_exit();
return;
} else if (fcr31 & FPU_CSR_INV_X)
info.si_code = FPE_FLTINV;
@@ -748,6 +756,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
info.si_errno = 0;
info.si_addr = (void __user *) regs->cp0_epc;
force_sig_info(SIGFPE, &info, current);
+ trace_trap_exit();
}
static void do_trap_or_bp(struct pt_regs *regs, unsigned int code,
@@ -979,6 +988,8 @@ asmlinkage void do_cpu(struct pt_regs *regs)
int status;
unsigned long __maybe_unused flags;
+ trace_trap_entry(regs, CAUSE_EXCCODE(regs->cp0_cause));
+
die_if_kernel("do_cpu invoked from kernel context!", regs);
cpid = (regs->cp0_cause >> CAUSEB_CE) & 3;
@@ -990,8 +1001,10 @@ asmlinkage void do_cpu(struct pt_regs *regs)
opcode = 0;
status = -1;
- if (unlikely(compute_return_epc(regs) < 0))
+ if (unlikely(compute_return_epc(regs) < 0)) {
+ trace_trap_exit();
return;
+ }
if (unlikely(get_user(opcode, epc) < 0))
status = SIGSEGV;
@@ -1009,7 +1022,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
regs->cp0_epc = old_epc; /* Undo skip-over. */
force_sig(status, current);
}
-
+ trace_trap_exit();
return;
case 1:
@@ -1029,11 +1042,12 @@ asmlinkage void do_cpu(struct pt_regs *regs)
if (!process_fpemu_return(sig, fault_addr))
mt_ase_fp_affinity();
}
-
+ trace_trap_exit();
return;
case 2:
raw_notifier_call_chain(&cu2_chain, CU2_EXCEPTION, regs);
+ trace_trap_exit();
return;
case 3:
@@ -1041,6 +1055,7 @@ asmlinkage void do_cpu(struct pt_regs *regs)
}
force_sig(SIGILL, current);
+ trace_trap_exit();
}
asmlinkage void do_mdmx(struct pt_regs *regs)
diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c
index cfea1adfa15..d3af94de240 100644
--- a/arch/mips/kernel/unaligned.c
+++ b/arch/mips/kernel/unaligned.c
@@ -79,6 +79,7 @@
#include <linux/sched.h>
#include <linux/debugfs.h>
#include <linux/perf_event.h>
+#include <trace/trap.h>
#include <asm/asm.h>
#include <asm/branch.h>
@@ -518,6 +519,7 @@ asmlinkage void do_ade(struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS,
1, 0, regs, regs->cp0_badvaddr);
+ trace_trap_entry(regs, CAUSE_EXCCODE(regs->cp0_cause));
/*
* Did we catch a fault trying to load an instruction?
* Or are we running in MIPS16 mode?
@@ -543,6 +545,8 @@ asmlinkage void do_ade(struct pt_regs *regs)
emulate_load_store_insn(regs, (void __user *)regs->cp0_badvaddr, pc);
set_fs(seg);
+ trace_trap_exit();
+
return;
sigbus:
@@ -552,6 +556,8 @@ sigbus:
/*
* XXX On return from the signal handler we should advance the epc
*/
+
+ trace_trap_exit();
}
#ifdef CONFIG_DEBUG_FS
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index 137ee76a004..1a5bd7b9018 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -19,6 +19,7 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/perf_event.h>
+#include <trace/fault.h>
#include <asm/branch.h>
#include <asm/mmu_context.h>
@@ -28,6 +29,9 @@
#include <asm/highmem.h> /* For VMALLOC_END */
#include <linux/kdebug.h>
+DEFINE_TRACE(page_fault_entry);
+DEFINE_TRACE(page_fault_exit);
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -144,7 +148,10 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
+ trace_page_fault_entry(regs, CAUSE_EXCCODE(regs->cp0_cause), mm, vma,
+ address, write);
fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
+ trace_page_fault_exit(fault);
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
diff --git a/arch/parisc/include/asm/thread_info.h b/arch/parisc/include/asm/thread_info.h
index aa8de727e90..cfcb2e70eba 100644
--- a/arch/parisc/include/asm/thread_info.h
+++ b/arch/parisc/include/asm/thread_info.h
@@ -62,6 +62,7 @@ struct thread_info {
#define TIF_NOTIFY_RESUME 8 /* callback before returning to user */
#define TIF_SINGLESTEP 9 /* single stepping? */
#define TIF_BLOCKSTEP 10 /* branch stepping? */
+#define TIF_KERNEL_TRACE 11 /* kernel trace active */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
@@ -73,6 +74,7 @@ struct thread_info {
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
+#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
_TIF_NEED_RESCHED | _TIF_RESTORE_SIGMASK)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7d69e9bf5e6..b13eeab289a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -117,11 +117,13 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_KPROBES
+ select HAVE_TRACE_CLOCK
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
select HAVE_MEMBLOCK
select HAVE_DMA_ATTRS
+ select HAVE_GET_CYCLES if PPC64
select HAVE_DMA_API_DEBUG
select USE_GENERIC_SMP_HELPERS if SMP
select HAVE_OPROFILE
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 65eb85976a0..34ba6e0a4d8 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -100,7 +100,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
TIF_NEED_RESCHED */
#define TIF_32BIT 4 /* 32 bit binary */
-#define TIF_PERFMON_WORK 5 /* work for pfm_handle_work() */
+#define TIF_KERNEL_TRACE 5 /* kernel trace active */
#define TIF_PERFMON_CTXSW 6 /* perfmon needs ctxsw calls */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SINGLESTEP 8 /* singlestepping active */
@@ -111,6 +111,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_NOTIFY_RESUME 13 /* callback before returning to user */
#define TIF_FREEZE 14 /* Freezing for suspend */
#define TIF_RUNLATCH 15 /* Is the runlatch enabled? */
+#define TIF_PERFMON_WORK 16 /* work for pfm_handle_work() */
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
@@ -118,7 +119,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_32BIT (1<<TIF_32BIT)
-#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
+#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE)
#define _TIF_PERFMON_CTXSW (1<<TIF_PERFMON_CTXSW)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
@@ -128,7 +129,8 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_FREEZE (1<<TIF_FREEZE)
#define _TIF_RUNLATCH (1<<TIF_RUNLATCH)
-#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP)
+#define _TIF_PERFMON_WORK (1<<TIF_PERFMON_WORK)
+#define _TIF_SYSCALL_T_OR_A (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP|_TIF_KERNEL_TRACE)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME)
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index c55e14f7ef4..2fe7460cbf9 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -14,6 +14,8 @@
typedef unsigned long cycles_t;
+extern unsigned long tb_ticks_per_sec;
+
static inline cycles_t get_cycles(void)
{
#ifdef __powerpc64__
@@ -46,5 +48,15 @@ static inline cycles_t get_cycles(void)
#endif
}
+static inline cycles_t get_cycles_rate(void)
+{
+ return tb_ticks_per_sec;
+}
+
+static inline void get_cycles_barrier(void)
+{
+ isync();
+}
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_TIMEX_H */
diff --git a/arch/powerpc/include/asm/trace-clock.h b/arch/powerpc/include/asm/trace-clock.h
new file mode 100644
index 00000000000..05facc3e372
--- /dev/null
+++ b/arch/powerpc/include/asm/trace-clock.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2005,2008 Mathieu Desnoyers
+ *
+ * Trace clock PowerPC definitions.
+ *
+ * Use get_tb() directly to insure reading a 64-bits value on powerpc 32.
+ */
+
+#ifndef _ASM_TRACE_CLOCK_H
+#define _ASM_TRACE_CLOCK_H
+
+#include <linux/timex.h>
+#include <linux/time.h>
+#include <asm/time.h>
+
+static inline u32 trace_clock_read32(void)
+{
+ return get_tbl();
+}
+
+static inline u64 trace_clock_read64(void)
+{
+ return get_tb();
+}
+
+static inline unsigned int trace_clock_frequency(void)
+{
+ return get_cycles_rate();
+}
+
+static inline u32 trace_clock_freq_scale(void)
+{
+ return 1;
+}
+
+static inline int get_trace_clock(void)
+{
+ return 0;
+}
+
+static inline void put_trace_clock(void)
+{
+}
+
+static inline void set_trace_clock_is_sync(int state)
+{
+}
+#endif /* _ASM_TRACE_CLOCK_H */
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index cbe2297d68b..d1c27723f84 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -7,7 +7,7 @@
#include <linux/tracepoint.h>
struct pt_regs;
-
+#if 0 /* disabled by Mathieu Desnoyers. Belongs to generic IRQS. */
TRACE_EVENT(irq_entry,
TP_PROTO(struct pt_regs *regs),
@@ -41,6 +41,7 @@ TRACE_EVENT(irq_exit,
TP_printk("pt_regs=%p", __entry->regs)
);
+#endif //0
TRACE_EVENT(timer_interrupt_entry,
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index ce557f6f00f..d21cf5bc503 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -401,8 +401,6 @@ void do_IRQ(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
unsigned int irq;
- trace_irq_entry(regs);
-
irq_enter();
check_stack_overflow();
@@ -425,8 +423,6 @@ void do_IRQ(struct pt_regs *regs)
timer_interrupt(regs);
}
#endif
-
- trace_irq_exit(regs);
}
void __init init_IRQ(void)
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 094bd9821ad..8294f73feac 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -665,7 +665,7 @@ _GLOBAL(abs)
* Create a kernel thread
* kernel_thread(fn, arg, flags)
*/
-_GLOBAL(kernel_thread)
+_GLOBAL(original_kernel_thread)
stwu r1,-16(r1)
stw r30,8(r1)
stw r31,12(r1)
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 206a321a71d..1e10e579922 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -415,7 +415,7 @@ _GLOBAL(scom970_write)
* Create a kernel thread
* kernel_thread(fn, arg, flags)
*/
-_GLOBAL(kernel_thread)
+_GLOBAL(original_kernel_thread)
std r29,-24(r1)
std r30,-16(r1)
stdu r1,-STACK_FRAME_OVERHEAD(r1)
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index ef3ef566235..046b16e4a57 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -161,6 +161,9 @@ EXPORT_SYMBOL(screen_info);
#ifdef CONFIG_PPC32
EXPORT_SYMBOL(timer_interrupt);
+#ifndef CONFIG_SPARSE_IRQ
+EXPORT_SYMBOL(irq_desc);
+#endif
EXPORT_SYMBOL(tb_ticks_per_jiffy);
EXPORT_SYMBOL(cacheable_memcpy);
EXPORT_SYMBOL(cacheable_memzero);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 8303a6c65ef..0a85886a784 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -38,6 +38,7 @@
#include <linux/personality.h>
#include <linux/random.h>
#include <linux/hw_breakpoint.h>
+#include <trace/sched.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
@@ -55,6 +56,8 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
+DEFINE_TRACE(sched_kthread_create);
+
extern unsigned long _get_SP(void);
#ifndef CONFIG_SMP
@@ -663,6 +666,17 @@ void show_regs(struct pt_regs * regs)
show_instructions(regs);
}
+long original_kernel_thread(int (*fn) (void *), void *arg, unsigned long flags);
+
+long kernel_thread(int (fn) (void *), void *arg, unsigned long flags)
+{
+ long retval;
+
+ retval = original_kernel_thread(fn, arg, flags);
+ trace_sched_kthread_create(fn, retval);
+ return retval;
+}
+
void exit_thread(void)
{
discard_lazy_cpu_state();
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 90653699829..fb8924c5fdf 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -34,12 +34,16 @@
#endif
#include <linux/hw_breakpoint.h>
#include <linux/perf_event.h>
+#include <trace/syscall.h>
#include <asm/uaccess.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/system.h>
+DEFINE_TRACE(syscall_entry);
+DEFINE_TRACE(syscall_exit);
+
/*
* The parameter save area on the stack is used to store arguments being passed
* to callee function and is located at fixed offset from stack pointer.
@@ -1680,6 +1684,8 @@ long do_syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
+ trace_syscall_entry(regs, regs->gpr[0]);
+
secure_computing(regs->gpr[0]);
if (test_thread_flag(TIF_SYSCALL_TRACE) &&
@@ -1715,6 +1721,8 @@ void do_syscall_trace_leave(struct pt_regs *regs)
{
int step;
+ trace_syscall_exit(regs->result);
+
if (unlikely(current->audit_context))
audit_syscall_exit((regs->ccr&0x10000000)?AUDITSC_FAILURE:AUDITSC_SUCCESS,
regs->result);
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 4e5bf1edc0f..5fe3cb1f38b 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -41,6 +41,7 @@
#include <linux/elf.h>
#include <linux/ipc.h>
#include <linux/slab.h>
+#include <trace/ipc.h>
#include <asm/ptrace.h>
#include <asm/types.h>
@@ -51,6 +52,7 @@
#include <asm/ppc-pci.h>
#include <asm/syscalls.h>
+DEFINE_TRACE(ipc_call);
asmlinkage long ppc32_select(u32 n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp, compat_ulong_t __user *exp,
@@ -79,6 +81,8 @@ long compat_sys_ipc(u32 call, u32 first, u32 second, u32 third, compat_uptr_t pt
version = call >> 16; /* hack for backward compatibility */
call &= 0xffff;
+ trace_ipc_call(call, first);
+
switch (call) {
case SEMTIMEDOP:
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 09d31dbf43f..fcbe3f5c074 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -54,6 +54,7 @@
#include <linux/irq.h>
#include <linux/delay.h>
#include <linux/irq_work.h>
+#include <trace/trap.h>
#include <asm/trace.h>
#include <asm/io.h>
@@ -585,6 +586,8 @@ void timer_interrupt(struct pt_regs * regs)
* some CPUs will continuue to take decrementer exceptions */
set_dec(DECREMENTER_MAX);
+ trace_trap_entry(regs, regs->trap);
+
#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
if (atomic_read(&ppc_n_lost_interrupts) != 0)
do_IRQ(regs);
@@ -631,6 +634,7 @@ void timer_interrupt(struct pt_regs * regs)
set_irq_regs(old_regs);
trace_timer_interrupt_exit(regs);
+ trace_trap_exit();
}
#ifdef CONFIG_SUSPEND
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index bd74fac169b..62ae8cad792 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -34,6 +34,8 @@
#include <linux/bug.h>
#include <linux/kdebug.h>
#include <linux/debugfs.h>
+#include <linux/ltt-core.h>
+#include <trace/trap.h>
#include <asm/emulated_ops.h>
#include <asm/pgtable.h>
@@ -75,6 +77,12 @@ EXPORT_SYMBOL(__debugger_fault_handler);
#endif
/*
+ * Also used in time.c and fault.c.
+ */
+DEFINE_TRACE(trap_entry);
+DEFINE_TRACE(trap_exit);
+
+/*
* Trap & Exception support
*/
@@ -141,6 +149,10 @@ int die(const char *str, struct pt_regs *regs, long err)
#ifdef CONFIG_NUMA
printk("NUMA ");
#endif
+#ifdef CONFIG_LTT
+ printk("LTT NESTING LEVEL : %u ", __get_cpu_var(ltt_nesting));
+ printk("\n");
+#endif
printk("%s\n", ppc_md.name ? ppc_md.name : "");
sysfs_printk_last_file();
@@ -204,11 +216,14 @@ void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr)
addr, regs->nip, regs->link, code);
}
+ trace_trap_entry(regs, regs->trap);
+
memset(&info, 0, sizeof(info));
info.si_signo = signr;
info.si_code = code;
info.si_addr = (void __user *) addr;
force_sig_info(signr, &info, current);
+ trace_trap_exit();
}
#ifdef CONFIG_PPC64
@@ -1087,7 +1102,9 @@ void performance_monitor_exception(struct pt_regs *regs)
{
__get_cpu_var(irq_stat).pmu_irqs++;
+ trace_trap_entry(regs, regs->trap);
perf_irq(regs);
+ trace_trap_exit();
}
#ifdef CONFIG_8xx
@@ -1308,12 +1325,14 @@ void altivec_assist_exception(struct pt_regs *regs)
/* got an error reading the instruction */
_exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip);
} else {
+ trace_trap_entry(regs, regs->trap);
/* didn't recognize the instruction */
/* XXX quick hack for now: set the non-Java bit in the VSCR */
if (printk_ratelimit())
printk(KERN_ERR "Unrecognized altivec instruction "
"in %s at %lx\n", current->comm, regs->nip);
current->thread.vscr.u[3] |= 0x10000;
+ trace_trap_exit();
}
}
#endif /* CONFIG_ALTIVEC */
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 54f4fb994e9..2fb3d0ce222 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -31,6 +31,7 @@
#include <linux/kdebug.h>
#include <linux/perf_event.h>
#include <linux/magic.h>
+#include <trace/fault.h>
#include <asm/firmware.h>
#include <asm/page.h>
@@ -43,6 +44,9 @@
#include <asm/siginfo.h>
#include <mm/mmu_decl.h>
+DEFINE_TRACE(page_fault_entry);
+DEFINE_TRACE(page_fault_exit);
+
#ifdef CONFIG_KPROBES
static inline int notify_page_fault(struct pt_regs *regs)
{
@@ -309,7 +313,9 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
+ trace_page_fault_entry(regs, regs->trap, mm, vma, address, is_write);
ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0);
+ trace_page_fault_exit(ret);
if (unlikely(ret & VM_FAULT_ERROR)) {
if (ret & VM_FAULT_OOM)
goto out_of_memory;
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 3c7c3f82d84..3f098525259 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -30,6 +30,7 @@
#include <linux/ptrace.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
+#include <linux/marker.h>
#include <asm/io.h>
#include <asm/time.h>
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 0b046628493..a7b4b883863 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -40,6 +40,7 @@
#include <linux/pid_namespace.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/marker.h>
#include <asm/io.h>
#include <asm/mmu_context.h>
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index ad1382f7932..637ce13517a 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -94,6 +94,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */
#define TIF_SECCOMP 10 /* secure computing */
#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */
+#define TIF_KERNEL_TRACE 12 /* kernel trace active */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling
TIF_NEED_RESCHED */
#define TIF_31BIT 17 /* 32bit process */
@@ -113,6 +114,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
+#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_31BIT (1<<TIF_31BIT)
#define _TIF_SINGLE_STEP (1<<TIF_FREEZE)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 648f64239a9..831874ffbc6 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -52,7 +52,8 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_MCCK_PENDING)
_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
- _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8)
+ _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8 | \
+ _TIF_KERNEL_TRACE>>8)
STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
STACK_SIZE = 1 << STACK_SHIFT
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 9d3603d6c51..7b5255e3b94 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -55,7 +55,8 @@ _TIF_WORK_SVC = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
_TIF_MCCK_PENDING)
_TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
- _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8)
+ _TIF_SECCOMP>>8 | _TIF_SYSCALL_TRACEPOINT>>8 | \
+ _TIF_KERNEL_TRACE>>8)
#define BASED(name) name-system_call(%r13)
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index ef86ad24398..df0af62a76d 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -45,6 +45,9 @@ enum s390_regset {
REGSET_GENERAL_EXTENDED,
};
+DEFINE_TRACE(syscall_entry);
+DEFINE_TRACE(syscall_exit);
+
void update_per_regs(struct task_struct *task)
{
static const struct per_regs per_single_step = {
@@ -723,6 +726,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
/* Do the secure computing check first. */
secure_computing(regs->gprs[2]);
+ trace_syscall_entry(regs, regs->gprs[2]);
/*
* The sysc_tracesys code in entry.S stored the system
* call number to gprs[2].
@@ -753,6 +757,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs)
{
+ trace_syscall_exit(regs->gprs[2]);
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->gprs[2]),
regs->gprs[2]);
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index 476081440df..dcc9d509af0 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -29,9 +29,12 @@
#include <linux/personality.h>
#include <linux/unistd.h>
#include <linux/ipc.h>
+#include <trace/ipc.h>
#include <asm/uaccess.h>
#include "entry.h"
+DEFINE_TRACE(ipc_call);
+
/*
* Perform the mmap() system call. Linux for S/390 isn't able to handle more
* than 5 system call parameters, so this system call uses a memory block
@@ -70,6 +73,8 @@ SYSCALL_DEFINE5(s390_ipc, uint, call, int, first, unsigned long, second,
struct ipc_kludge tmp;
int ret;
+ trace_ipc_call(call, first);
+
switch (call) {
case SEMOP:
return sys_semtimedop(first, (struct sembuf __user *)ptr,
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index b5a4a739b47..ff0dc02adc3 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -5,6 +5,7 @@
* Copyright (C) 1999,2000 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com),
* Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com),
+ * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation
*
* Derived from "arch/i386/kernel/traps.c"
* Copyright (C) 1991, 1992 Linus Torvalds
@@ -33,6 +34,7 @@
#include <linux/kprobes.h>
#include <linux/bug.h>
#include <linux/utsname.h>
+#include <trace/trap.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -65,6 +67,12 @@ static int kstack_depth_to_print = 20;
#endif /* CONFIG_64BIT */
/*
+ * Also used in fault.c.
+ */
+DEFINE_TRACE(trap_entry);
+DEFINE_TRACE(trap_exit);
+
+/*
* For show_trace we have tree different stack to consider:
* - the panic stack which is used if the kernel stack has overflown
* - the asynchronous interrupt stack (cpu related)
@@ -299,6 +307,8 @@ static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str,
pgm_int_code, signr) == NOTIFY_STOP)
return;
+ trace_trap_entry(regs, pgm_int_code & 0xffff);
+
if (regs->psw.mask & PSW_MASK_PSTATE) {
struct task_struct *tsk = current;
@@ -314,11 +324,14 @@ static inline void __kprobes do_trap(long pgm_int_code, int signr, char *str,
enum bug_trap_type btt;
btt = report_bug(regs->psw.addr & PSW_ADDR_INSN, regs);
- if (btt == BUG_TRAP_TYPE_WARN)
+ if (btt == BUG_TRAP_TYPE_WARN) {
+ trace_trap_exit();
return;
+ }
die(str, regs, pgm_int_code);
}
}
+ trace_trap_exit();
}
static inline void __user *get_psw_address(struct pt_regs *regs,
@@ -422,9 +435,11 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code,
location = get_psw_address(regs, pgm_int_code);
+ trace_trap_entry(regs, pgm_int_code & 0xffff);
+
if (regs->psw.mask & PSW_MASK_PSTATE) {
if (get_user(*((__u16 *) opcode), (__u16 __user *) location))
- return;
+ goto end;
if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) {
if (tracehook_consider_fatal_signal(current, SIGTRAP))
force_sig(SIGTRAP, current);
@@ -433,24 +448,24 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code,
#ifdef CONFIG_MATHEMU
} else if (opcode[0] == 0xb3) {
if (get_user(*((__u16 *) (opcode+2)), location+1))
- return;
+ goto end;
signal = math_emu_b3(opcode, regs);
} else if (opcode[0] == 0xed) {
if (get_user(*((__u32 *) (opcode+2)),
(__u32 __user *)(location+1)))
- return;
+ goto end;
signal = math_emu_ed(opcode, regs);
} else if (*((__u16 *) opcode) == 0xb299) {
if (get_user(*((__u16 *) (opcode+2)), location+1))
- return;
+ goto end;
signal = math_emu_srnm(opcode, regs);
} else if (*((__u16 *) opcode) == 0xb29c) {
if (get_user(*((__u16 *) (opcode+2)), location+1))
- return;
+ goto end;
signal = math_emu_stfpc(opcode, regs);
} else if (*((__u16 *) opcode) == 0xb29d) {
if (get_user(*((__u16 *) (opcode+2)), location+1))
- return;
+ goto end;
signal = math_emu_lfpc(opcode, regs);
#endif
} else
@@ -486,6 +501,8 @@ static void __kprobes illegal_op(struct pt_regs *regs, long pgm_int_code,
do_trap(pgm_int_code, signal,
"illegal operation", regs, &info);
}
+end:
+ trace_trap_exit();
}
@@ -500,6 +517,8 @@ asmlinkage void specification_exception(struct pt_regs *regs,
location = (__u16 __user *) get_psw_address(regs, pgm_int_code);
+ trace_trap_entry(regs, pgm_int_code & 0xffff);
+
if (regs->psw.mask & PSW_MASK_PSTATE) {
get_user(*((__u16 *) opcode), location);
switch (opcode[0]) {
@@ -544,6 +563,7 @@ asmlinkage void specification_exception(struct pt_regs *regs,
do_trap(pgm_int_code, signal,
"specification exception", regs, &info);
}
+ trace_trap_exit();
}
#else
DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
@@ -558,6 +578,8 @@ static void data_exception(struct pt_regs *regs, long pgm_int_code,
location = get_psw_address(regs, pgm_int_code);
+ trace_trap_entry(regs, pgm_int_code & 0xffff);
+
if (MACHINE_HAS_IEEE)
asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
@@ -631,6 +653,7 @@ static void data_exception(struct pt_regs *regs, long pgm_int_code,
info.si_addr = location;
do_trap(pgm_int_code, signal, "data exception", regs, &info);
}
+ trace_trap_exit();
}
static void space_switch_exception(struct pt_regs *regs, long pgm_int_code,
@@ -638,6 +661,7 @@ static void space_switch_exception(struct pt_regs *regs, long pgm_int_code,
{
siginfo_t info;
+ trace_trap_entry(regs, pgm_int_code & 0xffff);
/* Set user psw back to home space mode. */
if (regs->psw.mask & PSW_MASK_PSTATE)
regs->psw.mask |= PSW_ASC_HOME;
@@ -647,6 +671,7 @@ static void space_switch_exception(struct pt_regs *regs, long pgm_int_code,
info.si_code = ILL_PRVOPC;
info.si_addr = get_psw_address(regs, pgm_int_code);
do_trap(pgm_int_code, SIGILL, "space switch event", regs, &info);
+ trace_trap_exit();
}
asmlinkage void __kprobes kernel_stack_overflow(struct pt_regs * regs)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2c57806c085..f07b4d2cb53 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -5,6 +5,7 @@
* Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation
* Author(s): Hartmut Penner (hp@de.ibm.com)
* Ulrich Weigand (uweigand@de.ibm.com)
+ * Portions added by T. Halloran: (C) Copyright 2002 IBM Poughkeepsie, IBM Corporation
*
* Derived from "arch/i386/mm/fault.c"
* Copyright (C) 1995 Linus Torvalds
@@ -31,6 +32,7 @@
#include <linux/kprobes.h>
#include <linux/uaccess.h>
#include <linux/hugetlb.h>
+#include <trace/fault.h>
#include <asm/asm-offsets.h>
#include <asm/system.h>
#include <asm/pgtable.h>
@@ -39,6 +41,11 @@
#include <asm/compat.h>
#include "../kernel/entry.h"
+DEFINE_TRACE(page_fault_entry);
+DEFINE_TRACE(page_fault_exit);
+DEFINE_TRACE(page_fault_nosem_entry);
+DEFINE_TRACE(page_fault_nosem_exit);
+
#ifndef CONFIG_64BIT
#define __FAIL_ADDR_MASK 0x7ffff000
#define __SUBCODE_MASK 0x0200
@@ -272,7 +279,10 @@ static noinline void do_fault_error(struct pt_regs *regs, long int_code,
/* User mode accesses just cause a SIGSEGV */
si_code = (fault == VM_FAULT_BADMAP) ?
SEGV_MAPERR : SEGV_ACCERR;
+ trace_page_fault_nosem_entry(regs, int_code & 0xffff,
+ trans_exc_code);
do_sigsegv(regs, int_code, si_code, trans_exc_code);
+ trace_page_fault_nosem_exit();
return;
}
case VM_FAULT_BADCONTEXT:
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 8a9011dced1..fb4ef6a78db 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -48,6 +48,7 @@ config SUPERH32
select PERF_EVENTS
select ARCH_HIBERNATION_POSSIBLE if MMU
select SPARSE_IRQ
+ select HAVE_LTT_DUMP_TABLES
config SUPERH64
def_bool ARCH = "sh64"
@@ -211,6 +212,8 @@ config CPU_SH4
select CPU_HAS_FPU if !CPU_SH4AL_DSP
select SYS_SUPPORTS_TMU
select SYS_SUPPORTS_HUGETLBFS if MMU
+ select HAVE_TRACE_CLOCK
+ select HAVE_TRACE_CLOCK_32_TO_64
config CPU_SH4A
bool
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index c228946926e..2d94b81add3 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -120,6 +120,7 @@ extern void init_thread_xstate(void);
#define TIF_SECCOMP 6 /* secure computing */
#define TIF_NOTIFY_RESUME 7 /* callback before returning to user */
#define TIF_SYSCALL_TRACEPOINT 8 /* for ftrace syscall instrumentation */
+#define TIF_KERNEL_TRACE 9 /* kernel trace active */
#define TIF_POLLING_NRFLAG 17 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
#define TIF_FREEZE 19 /* Freezing for suspend */
@@ -132,6 +133,7 @@ extern void init_thread_xstate(void);
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_FREEZE (1 << TIF_FREEZE)
@@ -144,17 +146,18 @@ extern void init_thread_xstate(void);
/* work to do in syscall trace */
#define _TIF_WORK_SYSCALL_MASK (_TIF_SYSCALL_TRACE | _TIF_SINGLESTEP | \
_TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
- _TIF_SYSCALL_TRACEPOINT)
+ _TIF_SYSCALL_TRACEPOINT | _TIF_KERNEL_TRACE)
/* work to do on any return to u-space */
#define _TIF_ALLWORK_MASK (_TIF_SYSCALL_TRACE | _TIF_SIGPENDING | \
_TIF_NEED_RESCHED | _TIF_SYSCALL_AUDIT | \
_TIF_SINGLESTEP | _TIF_NOTIFY_RESUME | \
- _TIF_SYSCALL_TRACEPOINT)
+ _TIF_SYSCALL_TRACEPOINT | _TIF_KERNEL_TRACE)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK (_TIF_ALLWORK_MASK & ~(_TIF_SYSCALL_TRACE | \
- _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP))
+ _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
+ _TIF_KERNEL_TRACE))
/*
* Thread-synchronous status.
diff --git a/arch/sh/include/asm/timex.h b/arch/sh/include/asm/timex.h
index 18bf06d9c76..5249bee819c 100644
--- a/arch/sh/include/asm/timex.h
+++ b/arch/sh/include/asm/timex.h
@@ -12,6 +12,8 @@
* can be used for accurately setting CLOCK_TICK_RATE, otherwise we
* simply fall back on the i8253 PIT value.
*/
+
+#if 0
#ifdef CONFIG_SH_PCLK_FREQ
#define CLOCK_TICK_RATE (CONFIG_SH_PCLK_FREQ / 4) /* Underlying HZ */
#else
@@ -19,5 +21,18 @@
#endif
#include <asm-generic/timex.h>
+#endif //0
+
+#include <linux/io.h>
+#include <cpu/timer.h>
+
+#define CLOCK_TICK_RATE (HZ * 100000UL)
+
+typedef unsigned long long cycles_t;
+
+static __inline__ cycles_t get_cycles (void)
+{
+ return 0xffffffff - ctrl_inl(TMU1_TCNT);
+}
#endif /* __ASM_SH_TIMEX_H */
diff --git a/arch/sh/include/asm/trace-clock.h b/arch/sh/include/asm/trace-clock.h
new file mode 100644
index 00000000000..152d54c4181
--- /dev/null
+++ b/arch/sh/include/asm/trace-clock.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2007,2008 Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * Trace clock definitions for SuperH.
+ */
+
+#ifndef _ASM_SH_TRACE_CLOCK_H
+#define _ASM_SH_TRACE_CLOCK_H
+
+#include <linux/clocksource.h>
+#include <asm/clock.h>
+
+/*
+ * Number of hardware clock bits. The higher order bits are expected to be 0.
+ * If the hardware clock source has more than 32 bits, the bits higher than the
+ * 32nd will be truncated by a cast to a 32 bits unsigned. Range : 1 - 32.
+ * (too few bits would be unrealistic though, since we depend on the timer to
+ * detect the overflows).
+ */
+#define TC_HW_BITS 32
+
+/* Expected maximum interrupt latency in ms : 15ms, *2 for security */
+#define TC_EXPECTED_INTERRUPT_LATENCY 30
+
+extern u64 trace_clock_read_synthetic_tsc(void);
+extern u64 sh_get_clock_frequency(void);
+extern u32 sh_read_timer_count(void);
+extern void get_synthetic_tsc(void);
+extern void put_synthetic_tsc(void);
+
+static inline u32 trace_clock_read32(void)
+{
+ return sh_read_timer_count();
+}
+
+static inline u64 trace_clock_read64(void)
+{
+ return trace_clock_read_synthetic_tsc();
+}
+
+static inline u64 trace_clock_frequency(void)
+{
+ return sh_get_clock_frequency();
+}
+
+static inline u32 trace_clock_freq_scale(void)
+{
+ return 1;
+}
+
+static inline int get_trace_clock(void)
+{
+ get_synthetic_tsc();
+ return 0;
+}
+
+static inline void put_trace_clock(void)
+{
+ put_synthetic_tsc();
+}
+
+static inline void set_trace_clock_is_sync(int state)
+{
+}
+#endif /* _ASM_SH_TRACE_CLOCK_H */
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 77f7ae1d464..fcb0da93c42 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -47,5 +47,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_callchain.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o
+obj-$(CONFIG_HAVE_TRACE_CLOCK) += trace-clock.o
ccflags-y := -Werror
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 762a13984bb..ddffe37d968 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -21,12 +21,15 @@
#include <linux/fs.h>
#include <linux/ftrace.h>
#include <linux/hw_breakpoint.h>
+#include <trace/sched.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
#include <asm/system.h>
#include <asm/fpu.h>
#include <asm/syscalls.h>
+DEFINE_TRACE(sched_kthread_create);
+
void show_regs(struct pt_regs * regs)
{
printk("\n");
@@ -94,6 +97,8 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
&regs, 0, NULL, NULL);
+ trace_sched_kthread_create(fn, pid);
+
return pid;
}
EXPORT_SYMBOL(kernel_thread);
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 210c1cabcb7..4b17fd9ed79 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -25,12 +25,15 @@
#include <linux/init.h>
#include <linux/module.h>
#include <linux/io.h>
+#include <trace/sched.h>
#include <asm/syscalls.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
#include <asm/fpu.h>
+DEFINE_TRACE(sched_kthread_create);
+
struct task_struct *last_task_used_math = NULL;
void show_regs(struct pt_regs *regs)
@@ -300,6 +303,7 @@ ATTRIB_NORET void kernel_thread_helper(void *arg, int (*fn)(void *))
*/
int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
{
+ int pid;
struct pt_regs regs;
memset(&regs, 0, sizeof(regs));
@@ -310,8 +314,12 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
regs.sr = (1 << 30);
/* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
+ pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
&regs, 0, NULL, NULL);
+
+ trace_sched_kthread_create(fn, pid);
+
+ return pid;
}
EXPORT_SYMBOL(kernel_thread);
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 90a15d29fee..0373238e0d5 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -26,6 +26,10 @@
#include <linux/elf.h>
#include <linux/regset.h>
#include <linux/hw_breakpoint.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/marker.h>
+#include <trace/syscall.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -33,10 +37,34 @@
#include <asm/mmu_context.h>
#include <asm/syscalls.h>
#include <asm/fpu.h>
+#include <asm/unistd.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
+DEFINE_TRACE(syscall_entry);
+DEFINE_TRACE(syscall_exit);
+
+extern unsigned long sys_call_table[];
+void ltt_dump_sys_call_table(void *call_data)
+{
+ int i;
+ char namebuf[KSYM_NAME_LEN];
+
+ for (i = 0; i < NR_syscalls; i++) {
+ sprint_symbol(namebuf, sys_call_table[i]);
+ __trace_mark(0, syscall_state, sys_call_table, call_data,
+ "id %d address %p symbol %s",
+ i, (void *)sys_call_table[i], namebuf);
+ }
+}
+EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table);
+
+void ltt_dump_idt_table(void *call_data)
+{
+}
+EXPORT_SYMBOL_GPL(ltt_dump_idt_table);
+
/*
* This routine will get a word off of the process kernel stack.
*/
@@ -491,6 +519,8 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
{
long ret = 0;
+ trace_syscall_entry(regs, regs->regs[3]);
+
secure_computing(regs->regs[0]);
if (test_thread_flag(TIF_SYSCALL_TRACE) &&
@@ -517,6 +547,8 @@ asmlinkage void do_syscall_trace_leave(struct pt_regs *regs)
{
int step;
+ trace_syscall_exit(regs->regs[0]);
+
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->regs[0]),
regs->regs[0]);
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index 4436eacddb1..c893d20483b 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -31,6 +31,7 @@
#include <linux/tracehook.h>
#include <linux/elf.h>
#include <linux/regset.h>
+#include <trace/syscall.h>
#include <asm/io.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -43,6 +44,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
+DEFINE_TRACE(syscall_entry);
+DEFINE_TRACE(syscall_exit);
+
/* This mask defines the bits of the SR which the user is not allowed to
change, which are everything except S, Q, M, PR, SZ, FR. */
#define SR_MASK (0xffff8cfd)
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index 8c6a350df75..b519b22b575 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -27,6 +27,9 @@
#include <asm/unistd.h>
#include <asm/cacheflush.h>
#include <asm/cachectl.h>
+#include <trace/ipc.h>
+
+DEFINE_TRACE(ipc_call);
asmlinkage int old_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
diff --git a/arch/sh/kernel/trace-clock.c b/arch/sh/kernel/trace-clock.c
new file mode 100644
index 00000000000..0c5509b9667
--- /dev/null
+++ b/arch/sh/kernel/trace-clock.c
@@ -0,0 +1,55 @@
+/*
+ * arch/sh/kernel/trace-clock.c
+ *
+ * Trace clock for SuperH.
+ *
+ * Copyright (C) 2010 STMicroelectronics Ltd
+ *
+ * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+ *
+ * Note: currently only tested and supported on SH4 CPU
+ * (TODO: tests on other SuperH architectures).
+ */
+
+#include <linux/module.h>
+#include <linux/clocksource.h>
+#include <asm/clock.h>
+
+static struct clocksource *clksrc;
+
+/* In case of the TMU, for SH4 architectures, it returns
+ * the value of timer counter register (TCNT). */
+u32 sh_read_timer_count(void)
+{
+ u32 value = 0;
+
+ if (likely(clksrc))
+ value = (u32) clksrc->read(clksrc);
+
+ return value;
+}
+
+/* Get the clock rate for the timer (e.g. TMU for SH4) */
+u64 sh_get_clock_frequency(void)
+{
+ u64 rate = 0;
+ struct clk *clk;
+
+ clk = clk_get(NULL, "module_clk");
+ if (likely(clk))
+ rate = clk_get_rate(clk) / 4;
+
+ return rate;
+}
+
+/* Get the clock source needed to read the timer counter.
+ * For example a TMU channel for SH4 architectures. */
+static __init int init_sh_clocksource(void)
+{
+ clksrc = clocksource_get_next();
+ if (unlikely(!clksrc))
+ pr_err("%s: no clocksource found\n", __func__);
+
+ return 0;
+}
+early_initcall(init_sh_clocksource);
diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c
index 3484c2f65ab..5abd87752eb 100644
--- a/arch/sh/kernel/traps_32.c
+++ b/arch/sh/kernel/traps_32.c
@@ -27,6 +27,7 @@
#include <linux/sysfs.h>
#include <linux/uaccess.h>
#include <linux/perf_event.h>
+#include <trace/trap.h>
#include <asm/system.h>
#include <asm/alignment.h>
#include <asm/fpu.h>
@@ -47,6 +48,9 @@
#define TRAP_ILLEGAL_SLOT_INST 13
#endif
+DEFINE_TRACE(trap_entry);
+DEFINE_TRACE(trap_exit);
+
static void dump_mem(const char *str, unsigned long bottom, unsigned long top)
{
unsigned long p;
@@ -545,6 +549,8 @@ asmlinkage void do_address_error(struct pt_regs *regs,
error_code = lookup_exception_vector();
#endif
+ trace_trap_entry(regs, error_code >> 5);
+
oldfs = get_fs();
if (user_mode(regs)) {
@@ -589,8 +595,10 @@ fixup:
address);
set_fs(oldfs);
- if (tmp == 0)
+ if (!tmp) {
+ trace_trap_exit();
return; /* sorted */
+ }
uspace_segv:
printk(KERN_NOTICE "Sending SIGBUS to \"%s\" due to unaligned "
"access (PC %lx PR %lx)\n", current->comm, regs->pc,
@@ -623,6 +631,7 @@ uspace_segv:
0, address);
set_fs(oldfs);
}
+ trace_trap_exit();
}
#ifdef CONFIG_SH_DSP
diff --git a/arch/sh/mm/fault_32.c b/arch/sh/mm/fault_32.c
index d4c34d757f0..22695763403 100644
--- a/arch/sh/mm/fault_32.c
+++ b/arch/sh/mm/fault_32.c
@@ -16,11 +16,17 @@
#include <linux/hardirq.h>
#include <linux/kprobes.h>
#include <linux/perf_event.h>
+#include <trace/fault.h>
#include <asm/io_trapped.h>
#include <asm/system.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
+DEFINE_TRACE(page_fault_entry);
+DEFINE_TRACE(page_fault_exit);
+DEFINE_TRACE(page_fault_nosem_entry);
+DEFINE_TRACE(page_fault_nosem_exit);
+
static inline int notify_page_fault(struct pt_regs *regs, int trap)
{
int ret = 0;
@@ -200,7 +206,14 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault.
*/
+ trace_page_fault_entry(regs,
+ ({
+ unsigned long trapnr;
+ asm volatile("stc r2_bank,%0": "=r" (trapnr));
+ trapnr;
+ }) >> 5, mm, vma, address, writeaccess);
fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0);
+ trace_page_fault_exit(fault);
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM)
goto out_of_memory;
@@ -230,11 +243,18 @@ bad_area:
bad_area_nosemaphore:
if (user_mode(regs)) {
+ trace_page_fault_nosem_entry(regs,
+ ({
+ unsigned long trapnr;
+ asm volatile("stc r2_bank,%0": "=r" (trapnr));
+ trapnr;
+ }) >> 5, address);
info.si_signo = SIGSEGV;
info.si_errno = 0;
info.si_code = si_code;
info.si_addr = (void *) address;
force_sig_info(SIGSEGV, &info, tsk);
+ trace_page_fault_nosem_exit();
return;
}
@@ -324,6 +344,11 @@ handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
pmd_t *pmd;
pte_t *pte;
pte_t entry;
+ int ret;
+ int irqvec;
+
+ irqvec = lookup_exception_vector();
+ trace_page_fault_nosem_entry(regs, irqvec, address);
/*
* We don't take page faults for P1, P2, and parts of P4, these
@@ -333,24 +358,34 @@ handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
if (address >= P3SEG && address < P3_ADDR_MAX) {
pgd = pgd_offset_k(address);
} else {
- if (unlikely(address >= TASK_SIZE || !current->mm))
- return 1;
+ if (unlikely(address >= TASK_SIZE || !current->mm)) {
+ ret = 1;
+ goto out;
+ }
pgd = pgd_offset(current->mm, address);
}
pud = pud_offset(pgd, address);
- if (pud_none_or_clear_bad(pud))
- return 1;
+ if (pud_none_or_clear_bad(pud)) {
+ ret = 1;
+ goto out;
+ }
pmd = pmd_offset(pud, address);
- if (pmd_none_or_clear_bad(pmd))
- return 1;
+ if (pmd_none_or_clear_bad(pmd)) {
+ ret = 1;
+ goto out;
+ }
pte = pte_offset_kernel(pmd, address);
entry = *pte;
- if (unlikely(pte_none(entry) || pte_not_present(entry)))
- return 1;
- if (unlikely(writeaccess && !pte_write(entry)))
- return 1;
+ if (unlikely(pte_none(entry) || pte_not_present(entry))) {
+ ret = 1;
+ goto out;
+ }
+ if (unlikely(writeaccess && !pte_write(entry))) {
+ ret = 1;
+ goto out;
+ }
if (writeaccess)
entry = pte_mkdirty(entry);
@@ -370,5 +405,8 @@ handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess,
update_mmu_cache(NULL, address, pte);
- return 0;
+ ret = 0;
+out:
+ trace_page_fault_nosem_exit();
+ return ret;
}
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 95695e97703..76f95707b97 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -40,6 +40,7 @@ config SPARC64
select HAVE_KPROBES
select HAVE_MEMBLOCK
select HAVE_SYSCALL_WRAPPERS
+ select HAVE_GET_CYCLES
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_SYSCALL_TRACEPOINTS
@@ -50,6 +51,7 @@ config SPARC64
select RTC_DRV_STARFIRE
select HAVE_PERF_EVENTS
select PERF_USE_VMALLOC
+ select HAVE_TRACE_CLOCK
select HAVE_GENERIC_HARDIRQS
config ARCH_DEFCONFIG
diff --git a/arch/sparc/include/asm/thread_info_32.h b/arch/sparc/include/asm/thread_info_32.h
index 9dd0318d3dd..65489902244 100644
--- a/arch/sparc/include/asm/thread_info_32.h
+++ b/arch/sparc/include/asm/thread_info_32.h
@@ -128,6 +128,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */
+#define TIF_KERNEL_TRACE 5 /* kernel trace active */
#define TIF_USEDFPU 8 /* FPU was used by this task
* this quantum (SMP) */
#define TIF_POLLING_NRFLAG 9 /* true if poll_idle() is polling
@@ -137,6 +138,7 @@ BTFIXUPDEF_CALL(void, free_thread_info, struct thread_info *)
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
+#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE)
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index fb2ea7705a4..9de58956ace 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -214,7 +214,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
#define TIF_UNALIGNED 5 /* allowed to do unaligned accesses */
/* flag bit 6 is available */
#define TIF_32BIT 7 /* 32-bit binary */
-/* flag bit 8 is available */
+#define TIF_KERNEL_TRACE 8 /* kernel trace active */
#define TIF_SECCOMP 9 /* secure computing */
#define TIF_SYSCALL_AUDIT 10 /* syscall auditing active */
#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */
@@ -233,6 +233,7 @@ register struct thread_info *current_thread_info_reg asm("g6");
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_UNALIGNED (1<<TIF_UNALIGNED)
#define _TIF_32BIT (1<<TIF_32BIT)
+#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
diff --git a/arch/sparc/include/asm/timex_64.h b/arch/sparc/include/asm/timex_64.h
index 18b30bc9823..905443a8889 100644
--- a/arch/sparc/include/asm/timex_64.h
+++ b/arch/sparc/include/asm/timex_64.h
@@ -12,7 +12,24 @@
/* Getting on the cycle counter on sparc64. */
typedef unsigned long cycles_t;
-#define get_cycles() tick_ops->get_tick()
+
+static inline cycles_t get_cycles(void)
+{
+ return tick_ops->get_tick();
+}
+
+/* get_cycles instruction is synchronized on sparc64 */
+static inline void get_cycles_barrier(void)
+{
+ return;
+}
+
+extern unsigned long tb_ticks_per_usec;
+
+static inline cycles_t get_cycles_rate(void)
+{
+ return (cycles_t)tb_ticks_per_usec * 1000000UL;
+}
#define ARCH_HAS_READ_CURRENT_TIMER
diff --git a/arch/sparc/include/asm/trace-clock.h b/arch/sparc/include/asm/trace-clock.h
new file mode 100644
index 00000000000..306fdf7b7ba
--- /dev/null
+++ b/arch/sparc/include/asm/trace-clock.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2008, Mathieu Desnoyers
+ *
+ * Trace clock definitions for Sparc64.
+ */
+
+#ifndef _ASM_SPARC_TRACE_CLOCK_H
+#define _ASM_SPARC_TRACE_CLOCK_H
+
+#include <linux/timex.h>
+
+static inline u32 trace_clock_read32(void)
+{
+ return get_cycles();
+}
+
+static inline u64 trace_clock_read64(void)
+{
+ return get_cycles();
+}
+
+static inline unsigned int trace_clock_frequency(void)
+{
+ return get_cycles_rate();
+}
+
+static inline u32 trace_clock_freq_scale(void)
+{
+ return 1;
+}
+
+static inline int get_trace_clock(void)
+{
+ return 0;
+}
+
+static inline void put_trace_clock(void)
+{
+}
+
+static inline void set_trace_clock_is_sync(int state)
+{
+}
+#endif /* _ASM_SPARC_TRACE_CLOCK_H */
diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index 1504df8ddf7..54210d48dba 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -1151,7 +1151,7 @@ sys_sigreturn:
add %sp, STACKFRAME_SZ, %o0
ld [%curptr + TI_FLAGS], %l5
- andcc %l5, _TIF_SYSCALL_TRACE, %g0
+ andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE), %g0
be 1f
nop
@@ -1171,7 +1171,7 @@ sys_rt_sigreturn:
add %sp, STACKFRAME_SZ, %o0
ld [%curptr + TI_FLAGS], %l5
- andcc %l5, _TIF_SYSCALL_TRACE, %g0
+ andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE), %g0
be 1f
nop
@@ -1313,7 +1313,7 @@ syscall_is_too_hard:
ld [%curptr + TI_FLAGS], %l5
mov %i3, %o3
- andcc %l5, _TIF_SYSCALL_TRACE, %g0
+ andcc %l5, (_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE), %g0
mov %i4, %o4
bne linux_syscall_trace
mov %i0, %l5
@@ -1330,7 +1330,7 @@ ret_sys_call:
ld [%sp + STACKFRAME_SZ + PT_PSR], %g3
set PSR_C, %g2
bgeu 1f
- andcc %l6, _TIF_SYSCALL_TRACE, %g0
+ andcc %l6, (_TIF_SYSCALL_TRACE|_TIF_KERNEL_TRACE), %g0
/* System call success, clear Carry condition code. */
andn %g3, %g2, %g3
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 17529298c50..57c0d67b5c4 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -24,6 +24,7 @@
#include <linux/pm.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <trace/sched.h>
#include <asm/auxio.h>
#include <asm/oplib.h>
@@ -39,6 +40,8 @@
#include <asm/prom.h>
#include <asm/unistd.h>
+DEFINE_TRACE(sched_kthread_create);
+
/*
* Power management idle function
* Set in pm platform drivers (apc.c and pmc.c)
@@ -674,6 +677,7 @@ pid_t kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
"i" (__NR_clone), "r" (flags | CLONE_VM | CLONE_UNTRACED),
"i" (__NR_exit), "r" (fn), "r" (arg) :
"g1", "g2", "g3", "o0", "o1", "memory", "cc");
+ trace_sched_kthread_create(fn, retval);
return retval;
}
EXPORT_SYMBOL(kernel_thread);
diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c
index 3bc9c9979b9..d57935a0245 100644
--- a/arch/sparc/kernel/time_64.c
+++ b/arch/sparc/kernel/time_64.c
@@ -782,7 +782,8 @@ static struct clocksource clocksource_tick = {
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
-static unsigned long tb_ticks_per_usec __read_mostly;
+unsigned long tb_ticks_per_usec __read_mostly;
+EXPORT_SYMBOL_GPL(tb_ticks_per_usec);
void __delay(unsigned long loops)
{
diff --git a/arch/um/include/asm/thread_info.h b/arch/um/include/asm/thread_info.h
index e2cf786bda0..510bf5ba964 100644
--- a/arch/um/include/asm/thread_info.h
+++ b/arch/um/include/asm/thread_info.h
@@ -68,6 +68,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
#define TIF_SYSCALL_AUDIT 6
#define TIF_RESTORE_SIGMASK 7
+#define TIF_KERNEL_TRACE 8 /* kernel trace active */
#define TIF_FREEZE 16 /* is freezing for suspend */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
@@ -77,6 +78,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
+#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE)
#define _TIF_FREEZE (1 << TIF_FREEZE)
#endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d5ed94d30aa..b0389519b6d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -19,6 +19,7 @@ config X86
select HAVE_READQ
select HAVE_WRITEQ
select HAVE_UNSTABLE_SCHED_CLOCK
+ select HAVE_GET_CYCLES
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_PERF_EVENTS
@@ -27,9 +28,11 @@ config X86
select HAVE_KPROBES
select HAVE_MEMBLOCK
select ARCH_WANT_OPTIONAL_GPIOLIB
+ select HAVE_LTT_DUMP_TABLES
select ARCH_WANT_FRAME_POINTERS
select HAVE_DMA_ATTRS
select HAVE_KRETPROBES
+ select HAVE_TRACE_CLOCK
select HAVE_OPTPROBES
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_C_RECORDMCOUNT
@@ -208,10 +211,12 @@ config HAVE_INTEL_TXT
config X86_32_SMP
def_bool y
depends on X86_32 && SMP
+ select HAVE_UNSYNCHRONIZED_TSC
config X86_64_SMP
def_bool y
depends on X86_64 && SMP
+ select HAVE_UNSYNCHRONIZED_TSC
config X86_HT
def_bool y
diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c
index 29cdcd02ead..accd6b42bd2 100644
--- a/arch/x86/ia32/ipc32.c
+++ b/arch/x86/ia32/ipc32.c
@@ -8,8 +8,11 @@
#include <linux/shm.h>
#include <linux/ipc.h>
#include <linux/compat.h>
+#include <trace/ipc.h>
#include <asm/sys_ia32.h>
+DEFINE_TRACE(ipc_call);
+
asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
compat_uptr_t ptr, u32 fifth)
{
@@ -18,6 +21,8 @@ asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
version = call >> 16; /* hack for backward compatibility */
call &= 0xffff;
+ trace_ipc_call(call, first);
+
switch (call) {
case SEMOP:
/* struct sembuf is the same on 32 and 64bit :)) */
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h
index 38d87379e27..9b1db108f9e 100644
--- a/arch/x86/include/asm/idle.h
+++ b/arch/x86/include/asm/idle.h
@@ -1,20 +1,9 @@
#ifndef _ASM_X86_IDLE_H
#define _ASM_X86_IDLE_H
-#define IDLE_START 1
-#define IDLE_END 2
-
-struct notifier_block;
-void idle_notifier_register(struct notifier_block *n);
-void idle_notifier_unregister(struct notifier_block *n);
-
-#ifdef CONFIG_X86_64
-void enter_idle(void);
-void exit_idle(void);
-#else /* !CONFIG_X86_64 */
-static inline void enter_idle(void) { }
-static inline void exit_idle(void) { }
-#endif /* CONFIG_X86_64 */
+extern void enter_idle(void);
+extern void __exit_idle(void);
+extern void exit_idle(void);
void c1e_remove_cpu(int cpu);
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 5745ce8bf10..fdf897373e1 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -56,6 +56,61 @@ static inline void native_halt(void)
#endif
+#ifdef CONFIG_X86_64
+/*
+ * Only returns from a trap or exception to a NMI context (intra-privilege
+ * level near return) to the same SS and CS segments. Should be used
+ * upon trap or exception return when nested over a NMI context so no iret is
+ * issued. It takes care of modifying the eflags, rsp and returning to the
+ * previous function.
+ *
+ * The stack, at that point, looks like :
+ *
+ * 0(rsp) RIP
+ * 8(rsp) CS
+ * 16(rsp) EFLAGS
+ * 24(rsp) RSP
+ * 32(rsp) SS
+ *
+ * Upon execution :
+ * Copy EIP to the top of the return stack
+ * Update top of return stack address
+ * Pop eflags into the eflags register
+ * Make the return stack current
+ * Near return (popping the return address from the return stack)
+ */
+#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushq %rax; \
+ movq %rsp, %rax; \
+ movq 24+8(%rax), %rsp; \
+ pushq 0+8(%rax); \
+ pushq 16+8(%rax); \
+ movq (%rax), %rax; \
+ popfq; \
+ ret
+#else
+/*
+ * Protected mode only, no V8086. Implies that protected mode must
+ * be entered before NMIs or MCEs are enabled. Only returns from a trap or
+ * exception to a NMI context (intra-privilege level far return). Should be used
+ * upon trap or exception return when nested over a NMI context so no iret is
+ * issued.
+ *
+ * The stack, at that point, looks like :
+ *
+ * 0(esp) EIP
+ * 4(esp) CS
+ * 8(esp) EFLAGS
+ *
+ * Upon execution :
+ * Copy the stack eflags to top of stack
+ * Pop eflags into the eflags register
+ * Far return: pop EIP and CS into their register, and additionally pop EFLAGS.
+ */
+#define NATIVE_INTERRUPT_RETURN_NMI_SAFE pushl 8(%esp); \
+ popfl; \
+ lret $4
+#endif
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -112,6 +167,7 @@ static inline unsigned long arch_local_irq_save(void)
#define ENABLE_INTERRUPTS(x) sti
#define DISABLE_INTERRUPTS(x) cli
+#define INTERRUPT_RETURN_NMI_SAFE NATIVE_INTERRUPT_RETURN_NMI_SAFE
#ifdef CONFIG_X86_64
#define SWAPGS swapgs
diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/include/asm/kvm-mmutrace.h
index b60b4fdb3ed..42d117d0418 100644
--- a/arch/x86/kvm/mmutrace.h
+++ b/arch/x86/include/asm/kvm-mmutrace.h
@@ -217,9 +217,9 @@ TRACE_EVENT(
#endif /* _TRACE_KVMMMU_H */
#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_PATH asm
#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE mmutrace
+#define TRACE_INCLUDE_FILE kvm-mmutrace
/* This part must be outside protection */
#include <trace/define_trace.h>
diff --git a/arch/x86/kvm/trace.h b/arch/x86/include/asm/kvm-trace.h
index 1357d7cf4ec..c1e151c092b 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/include/asm/kvm-trace.h
@@ -701,9 +701,9 @@ TRACE_EVENT(kvm_emulate_insn,
#endif /* _TRACE_KVM_H */
#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH arch/x86/kvm
+#define TRACE_INCLUDE_PATH asm
#undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_FILE trace
+#define TRACE_INCLUDE_FILE kvm-trace
/* This part must be outside protection */
#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ebbc4d8ab17..1ef6906c179 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -962,6 +962,10 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_iret), CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_iret))
+#define INTERRUPT_RETURN_NMI_SAFE \
+ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_nmi_return), CLBR_NONE, \
+ jmp *%cs:pv_cpu_ops+PV_CPU_nmi_return)
+
#define DISABLE_INTERRUPTS(clobbers) \
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 82885099c86..3e0634cc127 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -181,6 +181,7 @@ struct pv_cpu_ops {
/* Normal iret. Jump to this with the standard iret stack
frame set up. */
void (*iret)(void);
+ void (*nmi_return)(void);
void (*swapgs)(void);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f0b6e5dbc5a..58a37ae7565 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -82,6 +82,7 @@ struct thread_info {
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
+#define TIF_KERNEL_TRACE 9 /* kernel trace active */
#define TIF_MCE_NOTIFY 10 /* notify userspace of an MCE */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
@@ -105,6 +106,7 @@ struct thread_info {
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_KERNEL_TRACE (1 << TIF_KERNEL_TRACE)
#define _TIF_MCE_NOTIFY (1 << TIF_MCE_NOTIFY)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_NOTSC (1 << TIF_NOTSC)
@@ -121,18 +123,19 @@ struct thread_info {
/* work to do in syscall_trace_enter() */
#define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
- _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
+ _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
+ _TIF_KERNEL_TRACE)
/* work to do in syscall_trace_leave() */
#define _TIF_WORK_SYSCALL_EXIT \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
- _TIF_SYSCALL_TRACEPOINT)
+ _TIF_SYSCALL_TRACEPOINT | _TIF_KERNEL_TRACE)
/* work to do on interrupt/exception return */
#define _TIF_WORK_MASK \
(0x0000FFFF & \
~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \
- _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
+ _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU|_TIF_KERNEL_TRACE))
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
diff --git a/arch/x86/include/asm/trace-clock.h b/arch/x86/include/asm/trace-clock.h
new file mode 100644
index 00000000000..8ca73323366
--- /dev/null
+++ b/arch/x86/include/asm/trace-clock.h
@@ -0,0 +1,73 @@
+#ifndef _ASM_X86_TRACE_CLOCK_H
+#define _ASM_X86_TRACE_CLOCK_H
+
+/*
+ * linux/arch/x86/include/asm/trace-clock.h
+ *
+ * Copyright (C) 2005,2006,2008
+ * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ *
+ * Trace clock definitions for x86.
+ */
+
+#include <linux/timex.h>
+#include <linux/time.h>
+#include <asm/system.h>
+#include <asm/processor.h>
+#include <asm/atomic.h>
+
+/* Minimum duration of a probe, in cycles */
+#define TRACE_CLOCK_MIN_PROBE_DURATION 200
+#define TRACE_CLOCK_RES TRACE_CLOCK_MIN_PROBE_DURATION
+
+union lttng_timespec {
+ struct timespec ts;
+ u64 lttng_ts;
+};
+
+extern cycles_t trace_clock_async_tsc_read(void);
+
+extern int _trace_clock_is_sync;
+static inline int trace_clock_is_sync(void)
+{
+ return _trace_clock_is_sync;
+}
+
+static inline u32 trace_clock_read32(void)
+{
+ u32 cycles;
+
+ if (likely(trace_clock_is_sync()))
+ cycles = (u32)get_cycles(); /* only need the 32 LSB */
+ else
+ cycles = (u32)trace_clock_async_tsc_read();
+ return cycles;
+}
+
+static inline u64 trace_clock_read64(void)
+{
+ u64 cycles;
+
+ if (likely(trace_clock_is_sync()))
+ cycles = get_cycles();
+ else
+ cycles = trace_clock_async_tsc_read();
+ return cycles;
+}
+
+static inline u64 trace_clock_frequency(void)
+{
+ return (u64)cpu_khz * 1000;
+}
+
+static inline u32 trace_clock_freq_scale(void)
+{
+ return 1;
+}
+
+extern int get_trace_clock(void);
+extern void put_trace_clock(void);
+
+extern void set_trace_clock_is_sync(int state);
+
+#endif /* _ASM_X86_TRACE_CLOCK_H */
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 1ca132fc0d0..28e56e1ec3c 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -51,6 +51,18 @@ extern int unsynchronized_tsc(void);
extern int check_tsc_unstable(void);
extern unsigned long native_calibrate_tsc(void);
+static inline cycles_t get_cycles_rate(void)
+{
+ if (check_tsc_unstable())
+ return 0;
+ return (cycles_t)tsc_khz * 1000;
+}
+
+static inline void get_cycles_barrier(void)
+{
+ rdtsc_barrier();
+}
+
/*
* Boot-time check whether the TSCs are synchronized across
* all CPUs/cores:
@@ -62,4 +74,10 @@ extern int notsc_setup(char *);
extern void save_sched_clock_state(void);
extern void restore_sched_clock_state(void);
+extern int test_tsc_synchronization(void);
+extern int _tsc_is_sync;
+static inline int tsc_is_sync(void)
+{
+ return _tsc_is_sync;
+}
#endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 3d61e204826..06abe8f409a 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -12,6 +12,7 @@ struct vsyscall_gtod_data {
u32 wall_time_nsec;
int sysctl_enabled;
+ int trace_clock_is_sync;
struct timezone sys_tz;
struct { /* extract of a clocksource struct */
cycle_t (*vread)(void);
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index d0983d255fb..47b80f3ba4d 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -39,6 +39,14 @@ extern struct timezone sys_tz;
extern void map_vsyscall(void);
+#ifdef CONFIG_X86_64
+extern void update_trace_clock_is_sync_vdso(void);
+#else
+static inline void update_trace_clock_is_sync_vdso(void)
+{
+}
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 34244b2cd88..717cf9c620b 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -46,6 +46,7 @@ obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
obj-y += pci-iommu_table.o
obj-y += resource.o
+obj-$(CONFIG_HAVE_TRACE_CLOCK) += trace-clock.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o
@@ -66,9 +67,8 @@ obj-$(CONFIG_PCI) += early-quirks.o
apm-y := apm_32.o
obj-$(CONFIG_APM) += apm.o
obj-$(CONFIG_SMP) += smp.o
-obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o
+obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += setup_percpu.o
-obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
obj-$(CONFIG_X86_MPPARSE) += mpparse.o
obj-y += apic/
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 76b96d74978..c604d23b4f3 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -33,6 +33,7 @@
#include <linux/dmi.h>
#include <linux/smp.h>
#include <linux/mm.h>
+#include <trace/irq.h>
#include <asm/perf_event.h>
#include <asm/x86_init.h>
@@ -868,7 +869,9 @@ void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
*/
exit_idle();
irq_enter();
+ trace_irq_entry(LOCAL_TIMER_VECTOR, regs, NULL);
local_apic_timer_interrupt();
+ trace_irq_exit(IRQ_HANDLED);
irq_exit();
set_irq_regs(old_regs);
@@ -1788,6 +1791,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
exit_idle();
irq_enter();
+ trace_irq_entry(SPURIOUS_APIC_VECTOR, NULL, NULL);
/*
* Check if this really is a spurious interrupt and ACK it
* if it is a vectored one. Just in case...
@@ -1802,6 +1806,7 @@ void smp_spurious_interrupt(struct pt_regs *regs)
/* see sw-dev-man vol 3, chapter 7.4.13.5 */
pr_info("spurious APIC interrupt on CPU#%d, "
"should never happen.\n", smp_processor_id());
+ trace_irq_exit(IRQ_HANDLED);
irq_exit();
}
@@ -1814,6 +1819,7 @@ void smp_error_interrupt(struct pt_regs *regs)
exit_idle();
irq_enter();
+ trace_irq_entry(ERROR_APIC_VECTOR, NULL, NULL);
/* First tickle the hardware, only then report what went on. -- REW */
v = apic_read(APIC_ESR);
apic_write(APIC_ESR, 0);
@@ -1834,6 +1840,7 @@ void smp_error_interrupt(struct pt_regs *regs)
*/
pr_debug("APIC error on CPU%d: %02x(%02x)\n",
smp_processor_id(), v , v1);
+ trace_irq_exit(IRQ_HANDLED);
irq_exit();
}
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 0e4f24c2a74..60939d5f226 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -227,6 +227,7 @@
#include <linux/suspend.h>
#include <linux/kthread.h>
#include <linux/jiffies.h>
+#include <linux/idle.h>
#include <asm/system.h>
#include <asm/uaccess.h>
@@ -235,6 +236,7 @@
#include <asm/olpc.h>
#include <asm/paravirt.h>
#include <asm/reboot.h>
+#include <asm/idle.h>
#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
extern int (*console_blank_hook)(int);
@@ -947,10 +949,17 @@ recalc:
break;
}
}
+ enter_idle();
if (original_pm_idle)
original_pm_idle();
else
default_idle();
+ /*
+ * In many cases the interrupt that ended idle
+ * has already called exit_idle. But some idle
+ * loops can be woken up without interrupt.
+ */
+ __exit_idle();
local_irq_disable();
jiffies_since_last_check = jiffies - last_jiffies;
if (jiffies_since_last_check > idle_period)
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 1a4088dda37..677f8475d9d 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -111,6 +111,7 @@ void foo(void)
OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable);
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return);
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0);
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 4a6aeedcd96..1aea11cd840 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -58,6 +58,7 @@ int main(void)
OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable);
OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
OFFSET(PV_CPU_iret, pv_cpu_ops, iret);
+ OFFSET(PV_CPU_nmi_return, pv_cpu_ops, nmi_return);
OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 1d59834396b..6052f6f65a6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1069,6 +1069,7 @@ unsigned long kernel_eflags;
* debugging, no special alignment required.
*/
DEFINE_PER_CPU(struct orig_ist, orig_ist);
+EXPORT_PER_CPU_SYMBOL_GPL(orig_ist);
#else /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 6f8c5e9da97..c8a6411d8ba 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -23,6 +23,7 @@
#include <linux/init.h>
#include <linux/smp.h>
#include <linux/cpu.h>
+#include <trace/irq.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -402,8 +403,10 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
{
exit_idle();
irq_enter();
+ trace_irq_entry(THERMAL_APIC_VECTOR, regs, NULL);
inc_irq_stat(irq_thermal_count);
smp_thermal_vector();
+ trace_irq_exit(IRQ_HANDLED);
irq_exit();
/* Ack only at the end to avoid potential reentry */
ack_APIC_irq();
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index df20723a6a1..6bed23e1c74 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -15,6 +15,7 @@
#include <linux/bug.h>
#include <linux/nmi.h>
#include <linux/sysfs.h>
+#include <linux/ltt-core.h>
#include <asm/stacktrace.h>
@@ -253,6 +254,8 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
if (!signr)
return;
+ if (in_nmi())
+ panic("Fatal exception in non-maskable interrupt");
if (in_interrupt())
panic("Fatal exception in interrupt");
if (panic_on_oops)
@@ -277,6 +280,10 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
printk("DEBUG_PAGEALLOC");
#endif
printk("\n");
+#ifdef CONFIG_LTT
+ printk(KERN_EMERG "LTT NESTING LEVEL : %u", __get_cpu_var(ltt_nesting));
+ printk("\n");
+#endif
sysfs_printk_last_file();
if (notify_die(DIE_OOPS, str, regs, err,
current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index c8b4efad7eb..2fae6c570fd 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -80,6 +80,8 @@
#define nr_syscalls ((syscall_table_size)/4)
+#define NMI_MASK 0x04000000
+
#ifdef CONFIG_PREEMPT
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
@@ -321,8 +323,32 @@ END(ret_from_fork)
# userspace resumption stub bypassing syscall exit tracing
ALIGN
RING0_PTREGS_FRAME
+
ret_from_exception:
preempt_stop(CLBR_ANY)
+ GET_THREAD_INFO(%ebp)
+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
+ movb PT_CS(%esp), %al
+ andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
+ cmpl $USER_RPL, %eax
+ jae resume_userspace # returning to v8086 or userspace
+ testl $NMI_MASK,TI_preempt_count(%ebp)
+ jz resume_kernel /* Not nested over NMI ? */
+ testw $X86_EFLAGS_TF, PT_EFLAGS(%esp)
+ jnz resume_kernel /*
+ * If single-stepping an NMI handler,
+ * use the normal iret path instead of
+ * the popf/lret because lret would be
+ * single-stepped. It should not
+ * happen : it will reactivate NMIs
+ * prematurely.
+ */
+ TRACE_IRQS_IRET
+ RESTORE_REGS
+ addl $4, %esp # skip orig_eax/error_code
+ CFI_ADJUST_CFA_OFFSET -4
+ INTERRUPT_RETURN_NMI_SAFE
+
ret_from_intr:
GET_THREAD_INFO(%ebp)
check_userspace:
@@ -906,6 +932,10 @@ ENTRY(native_iret)
.previous
END(native_iret)
+ENTRY(native_nmi_return)
+ NATIVE_INTERRUPT_RETURN_NMI_SAFE # Should we deal with popf exception ?
+END(native_nmi_return)
+
ENTRY(native_irq_enable_sysexit)
sti
sysexit
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index aed1ffbeb0c..c841c0fb5cc 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -163,6 +163,8 @@ GLOBAL(return_to_handler)
#endif
+#define NMI_MASK 0x04000000
+
#ifndef CONFIG_PREEMPT
#define retint_kernel retint_restore_args
#endif
@@ -515,6 +517,8 @@ sysret_check:
/* Handle reschedules */
/* edx: work, edi: workmask */
sysret_careful:
+ testl $_TIF_KERNEL_TRACE,%edx /* Re-read : concurrently changed */
+ jnz ret_from_sys_call_trace
bt $TIF_NEED_RESCHED,%edx
jnc sysret_signal
TRACE_IRQS_ON
@@ -524,6 +528,16 @@ sysret_careful:
popq_cfi %rdi
jmp sysret_check
+ret_from_sys_call_trace:
+ TRACE_IRQS_ON
+ sti
+ SAVE_REST
+ FIXUP_TOP_OF_STACK %rdi
+ movq %rsp,%rdi
+ LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ jmp int_ret_from_sys_call
+
/* Handle a signal */
sysret_signal:
TRACE_IRQS_ON
@@ -872,6 +886,9 @@ ENTRY(native_iret)
.section __ex_table,"a"
.quad native_iret, bad_iret
.previous
+
+ENTRY(native_nmi_return)
+ NATIVE_INTERRUPT_RETURN_NMI_SAFE
#endif
.section .fixup,"ax"
@@ -924,6 +941,24 @@ retint_signal:
GET_THREAD_INFO(%rcx)
jmp retint_with_reschedule
+ /* Returning to kernel space from exception. */
+ /* rcx: threadinfo. interrupts off. */
+ENTRY(retexc_kernel)
+ testl $NMI_MASK,TI_preempt_count(%rcx)
+ jz retint_kernel /* Not nested over NMI ? */
+ testw $X86_EFLAGS_TF,EFLAGS-ARGOFFSET(%rsp) /* trap flag? */
+ jnz retint_kernel /*
+ * If single-stepping an NMI handler,
+ * use the normal iret path instead of
+ * the popf/lret because lret would be
+ * single-stepped. It should not
+ * happen : it will reactivate NMIs
+ * prematurely.
+ */
+ RESTORE_ARGS 0,8,0
+ TRACE_IRQS_IRETQ
+ INTERRUPT_RETURN_NMI_SAFE
+
#ifdef CONFIG_PREEMPT
/* Returning to kernel space. Check if we need preemption */
/* rcx: threadinfo. interrupts off. */
@@ -1361,12 +1396,18 @@ ENTRY(paranoid_exit)
paranoid_swapgs:
TRACE_IRQS_IRETQ 0
SWAPGS_UNSAFE_STACK
+paranoid_restore_no_nmi:
RESTORE_ALL 8
jmp irq_return
paranoid_restore:
+ GET_THREAD_INFO(%rcx)
TRACE_IRQS_IRETQ 0
+ testl $NMI_MASK,TI_preempt_count(%rcx)
+ jz paranoid_restore_no_nmi /* Nested over NMI ? */
+ testw $X86_EFLAGS_TF,EFLAGS-0(%rsp) /* trap flag? */
+ jnz paranoid_restore_no_nmi
RESTORE_ALL 8
- jmp irq_return
+ INTERRUPT_RETURN_NMI_SAFE
paranoid_userspace:
GET_THREAD_INFO(%rcx)
movl TI_flags(%rcx),%ebx
@@ -1465,7 +1506,7 @@ ENTRY(error_exit)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
testl %eax,%eax
- jne retint_kernel
+ jne retexc_kernel
LOCKDEP_SYS_EXIT_IRQ
movl TI_flags(%rcx),%edx
movl $_TIF_WORK_MASK,%edi
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 869e1aeeb71..1fc5da98373 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -156,6 +156,7 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
ret = paravirt_patch_ident_64(insnbuf, len);
else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+ type == PARAVIRT_PATCH(pv_cpu_ops.nmi_return) ||
type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret64))
@@ -204,6 +205,7 @@ static void native_flush_tlb_single(unsigned long addr)
/* These are in entry.S */
extern void native_iret(void);
+extern void native_nmi_return(void);
extern void native_irq_enable_sysexit(void);
extern void native_usergs_sysret32(void);
extern void native_usergs_sysret64(void);
@@ -373,6 +375,7 @@ struct pv_cpu_ops pv_cpu_ops = {
.usergs_sysret64 = native_usergs_sysret64,
#endif
.iret = native_iret,
+ .nmi_return = native_nmi_return,
.swapgs = native_swapgs,
.set_iopl_mask = native_set_iopl_mask,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index d9f32e6d6ab..ac372778bbc 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -1,10 +1,13 @@
-#include <asm/paravirt.h>
+#include <linux/stringify.h>
+#include <linux/irqflags.h>
DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "push %eax; popf");
DEF_NATIVE(pv_irq_ops, save_fl, "pushf; pop %eax");
DEF_NATIVE(pv_cpu_ops, iret, "iret");
+DEF_NATIVE(pv_cpu_ops, nmi_return,
+ __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE));
DEF_NATIVE(pv_cpu_ops, irq_enable_sysexit, "sti; sysexit");
DEF_NATIVE(pv_mmu_ops, read_cr2, "mov %cr2, %eax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "mov %eax, %cr3");
@@ -41,6 +44,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, restore_fl);
PATCH_SITE(pv_irq_ops, save_fl);
PATCH_SITE(pv_cpu_ops, iret);
+ PATCH_SITE(pv_cpu_ops, nmi_return);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_mmu_ops, read_cr2);
PATCH_SITE(pv_mmu_ops, read_cr3);
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 3f08f34f93e..5339e67dc15 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -1,12 +1,15 @@
+#include <linux/irqflags.h>
+#include <linux/stringify.h>
#include <asm/paravirt.h>
#include <asm/asm-offsets.h>
-#include <linux/stringify.h>
DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
DEF_NATIVE(pv_irq_ops, irq_enable, "sti");
DEF_NATIVE(pv_irq_ops, restore_fl, "pushq %rdi; popfq");
DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
DEF_NATIVE(pv_cpu_ops, iret, "iretq");
+DEF_NATIVE(pv_cpu_ops, nmi_return,
+ __stringify(NATIVE_INTERRUPT_RETURN_NMI_SAFE));
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
@@ -51,6 +54,7 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
PATCH_SITE(pv_irq_ops, irq_enable);
PATCH_SITE(pv_irq_ops, irq_disable);
PATCH_SITE(pv_cpu_ops, iret);
+ PATCH_SITE(pv_cpu_ops, nmi_return);
PATCH_SITE(pv_cpu_ops, irq_enable_sysexit);
PATCH_SITE(pv_cpu_ops, usergs_sysret32);
PATCH_SITE(pv_cpu_ops, usergs_sysret64);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ff455419898..e0e4ffcad48 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -13,6 +13,7 @@
#include <linux/dmi.h>
#include <linux/utsname.h>
#include <trace/events/power.h>
+#include <trace/sched.h>
#include <linux/hw_breakpoint.h>
#include <asm/cpu.h>
#include <asm/system.h>
@@ -23,6 +24,8 @@
#include <asm/i387.h>
#include <asm/debugreg.h>
+DEFINE_TRACE(sched_kthread_create);
+
struct kmem_cache *task_xstate_cachep;
EXPORT_SYMBOL_GPL(task_xstate_cachep);
@@ -278,6 +281,7 @@ extern void kernel_thread_helper(void);
int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
{
struct pt_regs regs;
+ long pid;
memset(&regs, 0, sizeof(regs));
@@ -299,7 +303,10 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
regs.flags = X86_EFLAGS_IF | 0x2;
/* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+ pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED,
+ 0, &regs, 0, NULL, NULL);
+ trace_sched_kthread_create(fn, pid);
+ return pid;
}
EXPORT_SYMBOL(kernel_thread);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af4..3a8c9ee0fc6 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,9 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/notifier.h>
+#include <linux/idle.h>
+#include <trace/pm.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -59,6 +62,38 @@
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+DEFINE_TRACE(pm_idle_exit);
+DEFINE_TRACE(pm_idle_entry);
+
+static DEFINE_PER_CPU(unsigned char, is_idle);
+
+void enter_idle(void)
+{
+ percpu_write(is_idle, 1);
+ trace_pm_idle_entry();
+ notify_idle(IDLE_START);
+}
+EXPORT_SYMBOL_GPL(enter_idle);
+
+void __exit_idle(void)
+{
+ if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
+ return;
+ notify_idle(IDLE_END);
+ trace_pm_idle_exit();
+}
+EXPORT_SYMBOL_GPL(__exit_idle);
+
+/* Called from interrupts to signify idle end */
+void exit_idle(void)
+{
+ /* idle loop has pid 0 */
+ if (current->pid)
+ return;
+ __exit_idle();
+}
+EXPORT_SYMBOL_GPL(exit_idle);
+
/*
* Return saved PC of a blocked thread.
*/
@@ -107,10 +142,18 @@ void cpu_idle(void)
play_dead();
local_irq_disable();
+ enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
pm_idle();
start_critical_timings();
+ /*
+ * In many cases the interrupt that ended idle
+ * has already called exit_idle. But some idle
+ * loops can be woken up without interrupt.
+ */
+ __exit_idle();
+ trace_power_end(smp_processor_id());
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bd387e8f73b..b21b379013e 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -35,8 +35,10 @@
#include <linux/tick.h>
#include <linux/prctl.h>
#include <linux/uaccess.h>
+#include <linux/idle.h>
#include <linux/io.h>
#include <linux/ftrace.h>
+#include <trace/pm.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -51,37 +53,36 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
+#include <trace/events/power.h>
+
+DEFINE_TRACE(pm_idle_exit);
+DEFINE_TRACE(pm_idle_entry);
+
asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(unsigned long, old_rsp);
static DEFINE_PER_CPU(unsigned char, is_idle);
-static ATOMIC_NOTIFIER_HEAD(idle_notifier);
-
-void idle_notifier_register(struct notifier_block *n)
-{
- atomic_notifier_chain_register(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_register);
-
-void idle_notifier_unregister(struct notifier_block *n)
-{
- atomic_notifier_chain_unregister(&idle_notifier, n);
-}
-EXPORT_SYMBOL_GPL(idle_notifier_unregister);
-
void enter_idle(void)
{
percpu_write(is_idle, 1);
- atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+ /*
+ * Trace last event before calling notifiers. Notifiers flush
+ * data from buffers before going to idle.
+ */
+ trace_pm_idle_entry();
+ notify_idle(IDLE_START);
}
+EXPORT_SYMBOL_GPL(enter_idle);
-static void __exit_idle(void)
+void __exit_idle(void)
{
if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
return;
- atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+ notify_idle(IDLE_END);
+ trace_pm_idle_exit();
}
+EXPORT_SYMBOL_GPL(__exit_idle);
/* Called from interrupts to signify idle end */
void exit_idle(void)
@@ -91,6 +92,7 @@ void exit_idle(void)
return;
__exit_idle();
}
+EXPORT_SYMBOL_GPL(exit_idle);
#ifndef CONFIG_SMP
static inline void play_dead(void)
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 45892dc4b72..ee3024d4f61 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
#include <linux/signal.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
+#include <trace/syscall.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -152,6 +153,9 @@ static const int arg_offs_table[] = {
X86_EFLAGS_DF | X86_EFLAGS_OF | \
X86_EFLAGS_RF | X86_EFLAGS_AC))
+DEFINE_TRACE(syscall_entry);
+DEFINE_TRACE(syscall_exit);
+
/*
* Determines whether a value may be installed in a segment register.
*/
@@ -1361,6 +1365,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
if (test_thread_flag(TIF_SINGLESTEP))
regs->flags |= X86_EFLAGS_TF;
+ trace_syscall_entry(regs, regs->orig_ax);
+
/* do the secure computing check first */
secure_computing(regs->orig_ax);
@@ -1396,6 +1402,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
{
bool step;
+ trace_syscall_exit(regs->ax);
+
if (unlikely(current->audit_context))
audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
diff --git a/arch/x86/kernel/syscall_64.c b/arch/x86/kernel/syscall_64.c
index de87d600829..5e74f6aa3c0 100644
--- a/arch/x86/kernel/syscall_64.c
+++ b/arch/x86/kernel/syscall_64.c
@@ -1,8 +1,11 @@
/* System call table for x86-64. */
#include <linux/linkage.h>
+#include <linux/module.h>
#include <linux/sys.h>
#include <linux/cache.h>
+#include <linux/marker.h>
+#include <linux/kallsyms.h>
#include <asm/asm-offsets.h>
#define __NO_STUBS
@@ -27,3 +30,18 @@ const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/unistd_64.h>
};
+
+void ltt_dump_sys_call_table(void *call_data)
+{
+ int i;
+ char namebuf[KSYM_NAME_LEN];
+
+ for (i = 0; i < __NR_syscall_max + 1; i++) {
+ sprint_symbol(namebuf, (unsigned long)sys_call_table[i]);
+ __trace_mark(0, syscall_state, sys_call_table,
+ call_data,
+ "id %d address %p symbol %s",
+ i, (void*)sys_call_table[i], namebuf);
+ }
+}
+EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table);
diff --git a/arch/x86/kernel/trace-clock.c b/arch/x86/kernel/trace-clock.c
new file mode 100644
index 00000000000..47539e28276
--- /dev/null
+++ b/arch/x86/kernel/trace-clock.c
@@ -0,0 +1,302 @@
+/*
+ * arch/x86/kernel/trace-clock.c
+ *
+ * Trace clock for x86.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>, October 2008
+ */
+
+#include <linux/module.h>
+#include <linux/trace-clock.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/cpu.h>
+#include <linux/posix-timers.h>
+#include <asm/vgtod.h>
+
+static cycles_t trace_clock_last_tsc;
+static DEFINE_PER_CPU(struct timer_list, update_timer);
+static DEFINE_SPINLOCK(async_tsc_lock);
+static int async_tsc_refcount; /* Number of readers */
+static int async_tsc_enabled; /* Async TSC enabled on all online CPUs */
+
+int _trace_clock_is_sync = 1;
+EXPORT_SYMBOL_GPL(_trace_clock_is_sync);
+
+/*
+ * Is the trace clock being used by user-space ? We leave the trace clock active
+ * as soon as user-space starts using it. We never unref the trace clock
+ * reference taken by user-space.
+ */
+static atomic_t user_trace_clock_ref;
+
+/*
+ * Called by check_tsc_sync_source from CPU hotplug.
+ */
+void set_trace_clock_is_sync(int state)
+{
+ _trace_clock_is_sync = state;
+ update_trace_clock_is_sync_vdso();
+}
+
+#if BITS_PER_LONG == 64
+static cycles_t read_last_tsc(void)
+{
+ return trace_clock_last_tsc;
+}
+#else
+/*
+ * A cmpxchg64 update can happen concurrently. Based on the assumption that
+ * two cmpxchg64 will never update it to the same value (the count always
+ * increases), reading it twice insures that we read a coherent value with the
+ * same "sequence number".
+ */
+static cycles_t read_last_tsc(void)
+{
+ cycles_t val1, val2;
+
+ val1 = trace_clock_last_tsc;
+ for (;;) {
+ val2 = val1;
+ barrier();
+ val1 = trace_clock_last_tsc;
+ if (likely(val1 == val2))
+ break;
+ }
+ return val1;
+}
+#endif
+
+/*
+ * Support for architectures with non-sync TSCs.
+ * When the local TSC is discovered to lag behind the highest TSC counter, we
+ * increment the TSC count of an amount that should be, ideally, lower than the
+ * execution time of this routine, in cycles : this is the granularity we look
+ * for : we must be able to order the events.
+ */
+notrace cycles_t trace_clock_async_tsc_read(void)
+{
+ cycles_t new_tsc, last_tsc;
+
+ WARN_ON(!async_tsc_refcount || !async_tsc_enabled);
+ new_tsc = get_cycles();
+ last_tsc = read_last_tsc();
+ do {
+ if (new_tsc < last_tsc)
+ new_tsc = last_tsc + TRACE_CLOCK_MIN_PROBE_DURATION;
+ /*
+ * If cmpxchg fails with a value higher than the new_tsc, don't
+ * retry : the value has been incremented and the events
+ * happened almost at the same time.
+ * We must retry if cmpxchg fails with a lower value :
+ * it means that we are the CPU with highest frequency and
+ * therefore MUST update the value.
+ */
+ last_tsc = cmpxchg64(&trace_clock_last_tsc, last_tsc, new_tsc);
+ } while (unlikely(last_tsc < new_tsc));
+ return new_tsc;
+}
+EXPORT_SYMBOL_GPL(trace_clock_async_tsc_read);
+
+static void update_timer_ipi(void *info)
+{
+ (void)trace_clock_async_tsc_read();
+}
+
+/*
+ * update_timer_fct : - Timer function to resync the clocks
+ * @data: unused
+ *
+ * Fires every jiffy.
+ */
+static void update_timer_fct(unsigned long data)
+{
+ (void)trace_clock_async_tsc_read();
+ mod_timer_pinned(&per_cpu(update_timer, smp_processor_id()),
+ jiffies + 1);
+}
+
+static void enable_trace_clock(int cpu)
+{
+ init_timer(&per_cpu(update_timer, cpu));
+ per_cpu(update_timer, cpu).function = update_timer_fct;
+ per_cpu(update_timer, cpu).expires = jiffies + 1;
+ smp_call_function_single(cpu, update_timer_ipi, NULL, 1);
+ add_timer_on(&per_cpu(update_timer, cpu), cpu);
+}
+
+static void disable_trace_clock(int cpu)
+{
+ del_timer_sync(&per_cpu(update_timer, cpu));
+}
+
+/*
+ * hotcpu_callback - CPU hotplug callback
+ * @nb: notifier block
+ * @action: hotplug action to take
+ * @hcpu: CPU number
+ *
+ * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD)
+ */
+static int __cpuinit hotcpu_callback(struct notifier_block *nb,
+ unsigned long action,
+ void *hcpu)
+{
+ unsigned int hotcpu = (unsigned long)hcpu;
+ int cpu;
+
+ spin_lock(&async_tsc_lock);
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ break;
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ /*
+ * trace_clock_is_sync() is updated by set_trace_clock_is_sync()
+ * code, protected by cpu hotplug disable.
+ * It is ok to let the hotplugged CPU read the timebase before
+ * the CPU_ONLINE notification. It's just there to give a
+ * maximum bound to the TSC error.
+ */
+ if (async_tsc_refcount && !trace_clock_is_sync()) {
+ if (!async_tsc_enabled) {
+ async_tsc_enabled = 1;
+ for_each_online_cpu(cpu)
+ enable_trace_clock(cpu);
+ } else {
+ enable_trace_clock(hotcpu);
+ }
+ }
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
+ if (!async_tsc_refcount && num_online_cpus() == 1)
+ set_trace_clock_is_sync(1);
+ break;
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
+ /*
+ * We cannot stop the trace clock on other CPUs when readers are
+ * active even if we go back to a synchronized state (1 CPU)
+ * because the CPU left could be the one lagging behind.
+ */
+ if (async_tsc_refcount && async_tsc_enabled)
+ disable_trace_clock(hotcpu);
+ if (!async_tsc_refcount && num_online_cpus() == 1)
+ set_trace_clock_is_sync(1);
+ break;
+#endif /* CONFIG_HOTPLUG_CPU */
+ }
+ spin_unlock(&async_tsc_lock);
+
+ return NOTIFY_OK;
+}
+
+int get_trace_clock(void)
+{
+ int cpu;
+
+ if (!trace_clock_is_sync()) {
+ printk(KERN_WARNING
+ "Trace clock falls back on cache-line bouncing\n"
+ "workaround due to non-synchronized TSCs.\n"
+ "This workaround preserves event order across CPUs.\n"
+ "Please consider disabling Speedstep or PowerNow and\n"
+ "using kernel parameters "
+ "\"force_tsc_sync=1 idle=poll\"\n"
+ "for accurate and fast tracing clock source.\n");
+ }
+
+ get_online_cpus();
+ spin_lock(&async_tsc_lock);
+ if (async_tsc_refcount++ || trace_clock_is_sync())
+ goto end;
+
+ async_tsc_enabled = 1;
+ for_each_online_cpu(cpu)
+ enable_trace_clock(cpu);
+end:
+ spin_unlock(&async_tsc_lock);
+ put_online_cpus();
+ return 0;
+}
+EXPORT_SYMBOL_GPL(get_trace_clock);
+
+void put_trace_clock(void)
+{
+ int cpu;
+
+ get_online_cpus();
+ spin_lock(&async_tsc_lock);
+ WARN_ON(async_tsc_refcount <= 0);
+ if (async_tsc_refcount != 1 || !async_tsc_enabled)
+ goto end;
+
+ for_each_online_cpu(cpu)
+ disable_trace_clock(cpu);
+ async_tsc_enabled = 0;
+end:
+ async_tsc_refcount--;
+ if (!async_tsc_refcount && num_online_cpus() == 1)
+ set_trace_clock_is_sync(1);
+ spin_unlock(&async_tsc_lock);
+ put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(put_trace_clock);
+
+static int posix_get_trace(clockid_t which_clock, struct timespec *tp)
+{
+ union lttng_timespec *lts = (union lttng_timespec *) tp;
+ int ret;
+
+ /*
+ * Yes, there is a race here that would lead to refcount being
+ * incremented more than once, but all we care is to leave the trace
+ * clock active forever, so precise accounting is not needed.
+ */
+ if (unlikely(!atomic_read(&user_trace_clock_ref))) {
+ ret = get_trace_clock();
+ if (ret)
+ return ret;
+ atomic_inc(&user_trace_clock_ref);
+ }
+ lts->lttng_ts = trace_clock_read64();
+ return 0;
+}
+
+static int posix_get_trace_freq(clockid_t which_clock, struct timespec *tp)
+{
+ union lttng_timespec *lts = (union lttng_timespec *) tp;
+
+ lts->lttng_ts = trace_clock_frequency();
+ return 0;
+}
+
+static int posix_get_trace_res(const clockid_t which_clock, struct timespec *tp)
+{
+ union lttng_timespec *lts = (union lttng_timespec *) tp;
+
+ lts->lttng_ts = TRACE_CLOCK_RES;
+ return 0;
+}
+
+static __init int init_unsync_trace_clock(void)
+{
+ struct k_clock clock_trace = {
+ .clock_getres = posix_get_trace_res,
+ .clock_get = posix_get_trace,
+ };
+ struct k_clock clock_trace_freq = {
+ .clock_getres = posix_get_trace_res,
+ .clock_get = posix_get_trace_freq,
+ };
+
+ register_posix_clock(CLOCK_TRACE, &clock_trace);
+ register_posix_clock(CLOCK_TRACE_FREQ, &clock_trace_freq);
+
+ hotcpu_notifier(hotcpu_callback, 4);
+ return 0;
+}
+early_initcall(init_unsync_trace_clock);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b9b67166f9d..bc618945fb1 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -31,6 +31,7 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/io.h>
+#include <trace/trap.h>
#ifdef CONFIG_EISA
#include <linux/ioport.h>
@@ -52,6 +53,7 @@
#include <asm/atomic.h>
#include <asm/system.h>
#include <asm/traps.h>
+#include <asm/unistd.h>
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/mce.h>
@@ -76,11 +78,21 @@ char ignore_fpu_irq;
* F0 0F bug workaround.
*/
gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
+
+extern unsigned long sys_call_table[];
+extern unsigned long syscall_table_size;
+
#endif
DECLARE_BITMAP(used_vectors, NR_VECTORS);
EXPORT_SYMBOL_GPL(used_vectors);
+/*
+ * Also used in arch/x86/mm/fault.c.
+ */
+DEFINE_TRACE(trap_entry);
+DEFINE_TRACE(trap_exit);
+
static int ignore_nmis;
int unknown_nmi_panic;
@@ -122,6 +134,8 @@ do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
{
struct task_struct *tsk = current;
+ trace_trap_entry(regs, trapnr);
+
#ifdef CONFIG_X86_32
if (regs->flags & X86_VM_MASK) {
/*
@@ -168,7 +182,7 @@ trap_signal:
force_sig_info(signr, info, tsk);
else
force_sig(signr, tsk);
- return;
+ goto end;
kernel_trap:
if (!fixup_exception(regs)) {
@@ -176,15 +190,17 @@ kernel_trap:
tsk->thread.trap_no = trapnr;
die(str, regs, error_code);
}
- return;
+ goto end;
#ifdef CONFIG_X86_32
vm86_trap:
if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, trapnr))
goto trap_signal;
- return;
+ goto end;
#endif
+end:
+ trace_trap_exit();
}
#define DO_ERROR(trapnr, signr, str, name) \
@@ -285,7 +301,9 @@ do_general_protection(struct pt_regs *regs, long error_code)
printk("\n");
}
+ trace_trap_entry(regs, 13);
force_sig(SIGSEGV, tsk);
+ trace_trap_exit();
return;
#ifdef CONFIG_X86_32
@@ -398,13 +416,15 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
+ trace_trap_entry(regs, 2);
+
/*
* CPU-specific NMI must be processed before non-CPU-specific
* NMI, otherwise we may lose it, because the CPU-specific
* NMI can not be detected/processed on other CPUs.
*/
if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
- return;
+ goto end;
/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
raw_spin_lock(&nmi_reason_lock);
@@ -423,11 +443,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
reassert_nmi();
#endif
raw_spin_unlock(&nmi_reason_lock);
- return;
+ goto end;
}
raw_spin_unlock(&nmi_reason_lock);
unknown_nmi_error(reason, regs);
+end:
+ trace_trap_exit();
}
dotraplinkage notrace __kprobes void
@@ -570,8 +592,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
preempt_conditional_sti(regs);
if (regs->flags & X86_VM_MASK) {
+ trace_trap_entry(regs, 1);
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
+ trace_trap_exit();
preempt_conditional_cli(regs);
return;
}
@@ -589,13 +613,32 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
regs->flags &= ~X86_EFLAGS_TF;
}
si_code = get_si_code(tsk->thread.debugreg6);
- if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp)
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) {
+ trace_trap_entry(regs, 1);
send_sigtrap(tsk, regs, error_code, si_code);
+ trace_trap_exit();
+ }
preempt_conditional_cli(regs);
return;
}
+#ifdef CONFIG_X86_32
+void ltt_dump_sys_call_table(void *call_data)
+{
+ int i;
+ char namebuf[KSYM_NAME_LEN];
+
+ for (i = 0; i < NR_syscalls; i++) {
+ sprint_symbol(namebuf, sys_call_table[i]);
+ __trace_mark(0, syscall_state, sys_call_table, call_data,
+ "id %d address %p symbol %s",
+ i, (void*)sys_call_table[i], namebuf);
+ }
+}
+EXPORT_SYMBOL_GPL(ltt_dump_sys_call_table);
+#endif
+
/*
* Note that we play around with the 'TS' bit in an attempt to get
* the correct behaviour even in the presence of the asynchronous
@@ -701,11 +744,13 @@ do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
dotraplinkage void
do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
{
+ trace_trap_entry(regs, 16);
conditional_sti(regs);
#if 0
/* No need to warn about this any longer. */
printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
#endif
+ trace_trap_exit();
}
asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
@@ -738,6 +783,21 @@ void __math_state_restore(void)
tsk->fpu_counter++;
}
+void ltt_dump_idt_table(void *call_data)
+{
+ int i;
+ char namebuf[KSYM_NAME_LEN];
+
+ for (i = 0; i < IDT_ENTRIES; i++) {
+ unsigned long address = gate_offset(idt_table[i]);
+ sprint_symbol(namebuf, address);
+ __trace_mark(0, irq_state, idt_table, call_data,
+ "irq %d address %p symbol %s",
+ i, (void *)address, namebuf);
+ }
+}
+EXPORT_SYMBOL_GPL(ltt_dump_idt_table);
+
/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
deleted file mode 100644
index 0aa5fed8b9e..00000000000
--- a/arch/x86/kernel/tsc_sync.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * check TSC synchronization.
- *
- * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
- *
- * We check whether all boot CPUs have their TSC's synchronized,
- * print a warning if not and turn off the TSC clock-source.
- *
- * The warp-check is point-to-point between two CPUs, the CPU
- * initiating the bootup is the 'source CPU', the freshly booting
- * CPU is the 'target CPU'.
- *
- * Only two CPUs may participate - they can enter in any order.
- * ( The serial nature of the boot logic and the CPU hotplug lock
- * protects against more than 2 CPUs entering this code. )
- */
-#include <linux/spinlock.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/smp.h>
-#include <linux/nmi.h>
-#include <asm/tsc.h>
-
-/*
- * Entry/exit counters that make sure that both CPUs
- * run the measurement code at once:
- */
-static __cpuinitdata atomic_t start_count;
-static __cpuinitdata atomic_t stop_count;
-
-/*
- * We use a raw spinlock in this exceptional case, because
- * we want to have the fastest, inlined, non-debug version
- * of a critical section, to be able to prove TSC time-warps:
- */
-static __cpuinitdata arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
-
-static __cpuinitdata cycles_t last_tsc;
-static __cpuinitdata cycles_t max_warp;
-static __cpuinitdata int nr_warps;
-
-/*
- * TSC-warp measurement loop running on both CPUs:
- */
-static __cpuinit void check_tsc_warp(void)
-{
- cycles_t start, now, prev, end;
- int i;
-
- rdtsc_barrier();
- start = get_cycles();
- rdtsc_barrier();
- /*
- * The measurement runs for 20 msecs:
- */
- end = start + tsc_khz * 20ULL;
- now = start;
-
- for (i = 0; ; i++) {
- /*
- * We take the global lock, measure TSC, save the
- * previous TSC that was measured (possibly on
- * another CPU) and update the previous TSC timestamp.
- */
- arch_spin_lock(&sync_lock);
- prev = last_tsc;
- rdtsc_barrier();
- now = get_cycles();
- rdtsc_barrier();
- last_tsc = now;
- arch_spin_unlock(&sync_lock);
-
- /*
- * Be nice every now and then (and also check whether
- * measurement is done [we also insert a 10 million
- * loops safety exit, so we dont lock up in case the
- * TSC readout is totally broken]):
- */
- if (unlikely(!(i & 7))) {
- if (now > end || i > 10000000)
- break;
- cpu_relax();
- touch_nmi_watchdog();
- }
- /*
- * Outside the critical section we can now see whether
- * we saw a time-warp of the TSC going backwards:
- */
- if (unlikely(prev > now)) {
- arch_spin_lock(&sync_lock);
- max_warp = max(max_warp, prev - now);
- nr_warps++;
- arch_spin_unlock(&sync_lock);
- }
- }
- WARN(!(now-start),
- "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
- now-start, end-start);
-}
-
-/*
- * Source CPU calls into this - it waits for the freshly booted
- * target CPU to arrive and then starts the measurement:
- */
-void __cpuinit check_tsc_sync_source(int cpu)
-{
- int cpus = 2;
-
- /*
- * No need to check if we already know that the TSC is not
- * synchronized:
- */
- if (unsynchronized_tsc())
- return;
-
- if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) {
- if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING)
- pr_info(
- "Skipped synchronization checks as TSC is reliable.\n");
- return;
- }
-
- /*
- * Reset it - in case this is a second bootup:
- */
- atomic_set(&stop_count, 0);
-
- /*
- * Wait for the target to arrive:
- */
- while (atomic_read(&start_count) != cpus-1)
- cpu_relax();
- /*
- * Trigger the target to continue into the measurement too:
- */
- atomic_inc(&start_count);
-
- check_tsc_warp();
-
- while (atomic_read(&stop_count) != cpus-1)
- cpu_relax();
-
- if (nr_warps) {
- pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
- smp_processor_id(), cpu);
- pr_warning("Measured %Ld cycles TSC warp between CPUs, "
- "turning off TSC clock.\n", max_warp);
- mark_tsc_unstable("check_tsc_sync_source failed");
- } else {
- pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
- smp_processor_id(), cpu);
- }
-
- /*
- * Reset it - just in case we boot another CPU later:
- */
- atomic_set(&start_count, 0);
- nr_warps = 0;
- max_warp = 0;
- last_tsc = 0;
-
- /*
- * Let the target continue with the bootup:
- */
- atomic_inc(&stop_count);
-}
-
-/*
- * Freshly booted CPUs call into this:
- */
-void __cpuinit check_tsc_sync_target(void)
-{
- int cpus = 2;
-
- if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
- return;
-
- /*
- * Register this CPU's participation and wait for the
- * source CPU to start the measurement:
- */
- atomic_inc(&start_count);
- while (atomic_read(&start_count) != cpus)
- cpu_relax();
-
- check_tsc_warp();
-
- /*
- * Ok, we are done:
- */
- atomic_inc(&stop_count);
-
- /*
- * Wait for the source CPU to print stuff:
- */
- while (atomic_read(&stop_count) != cpus)
- cpu_relax();
-}
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dcbb28c4b69..df18f14c473 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -44,6 +44,8 @@
#include <asm/desc.h>
#include <asm/topology.h>
#include <asm/vgtod.h>
+#include <asm/trace-clock.h>
+#include <asm/timer.h>
#define __vsyscall(nr) \
__attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
@@ -61,6 +63,7 @@ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
{
.lock = SEQLOCK_UNLOCKED,
.sysctl_enabled = 1,
+ .trace_clock_is_sync = 1,
};
void update_vsyscall_tz(void)
@@ -73,6 +76,16 @@ void update_vsyscall_tz(void)
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
+void update_trace_clock_is_sync_vdso(void)
+{
+ unsigned long flags;
+
+ write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
+ vsyscall_gtod_data.trace_clock_is_sync = _trace_clock_is_sync;
+ write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+}
+EXPORT_SYMBOL_GPL(update_trace_clock_is_sync_vdso);
+
void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
struct clocksource *clock, u32 mult)
{
@@ -89,6 +102,7 @@ void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
vsyscall_gtod_data.wall_to_monotonic = *wtm;
vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
+ vsyscall_gtod_data.trace_clock_is_sync = _trace_clock_is_sync;
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 3cece05e4ac..f894af174b8 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -32,7 +32,7 @@
#include "irq.h"
#include <linux/kvm_host.h>
-#include "trace.h"
+#include <asm/kvm-trace.h>
static void pic_irq_request(struct kvm *kvm, int level);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 93cf9d0d365..58bcbce5b02 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -36,7 +36,7 @@
#include <asm/atomic.h>
#include "kvm_cache_regs.h"
#include "irq.h"
-#include "trace.h"
+#include <asm/kvm-trace.h>
#include "x86.h"
#ifndef CONFIG_X86_64
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f02b8edc3d4..3612044ed1f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -163,7 +163,7 @@ module_param(oos_shadow, bool, 0644);
#include <trace/events/kvm.h>
#define CREATE_TRACE_POINTS
-#include "mmutrace.h"
+#include <asm/kvm-mmutrace.h>
#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 63fec1531e8..b14429dda24 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -34,7 +34,7 @@
#include <asm/kvm_para.h>
#include <asm/virtext.h>
-#include "trace.h"
+#include <asm/kvm-trace.h>
#define __ex(x) __kvm_handle_fault_on_reboot(x)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bf89ec2cfb8..d12b42e234b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -40,7 +40,7 @@
#include <asm/i387.h>
#include <asm/xcr.h>
-#include "trace.h"
+#include <asm/kvm-trace.h>
#define __ex(x) __kvm_handle_fault_on_reboot(x)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bcc0efce85b..6a8cb6fe5c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -47,7 +47,7 @@
#include <trace/events/kvm.h>
#define CREATE_TRACE_POINTS
-#include "trace.h"
+#include <asm/kvm-trace.h>
#include <asm/debugreg.h>
#include <asm/msr.h>
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index eba687f0cc0..07f7a272226 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1276,6 +1276,7 @@ __init void lguest_init(void)
pv_cpu_ops.cpuid = lguest_cpuid;
pv_cpu_ops.load_idt = lguest_load_idt;
pv_cpu_ops.iret = lguest_iret;
+ pv_cpu_ops.nmi_return = lguest_iret;
pv_cpu_ops.load_sp0 = lguest_load_sp0;
pv_cpu_ops.load_tr_desc = lguest_load_tr_desc;
pv_cpu_ops.set_ldt = lguest_set_ldt;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 7d90ceb882a..00309849aa1 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -12,6 +12,7 @@
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
#include <linux/perf_event.h> /* perf_sw_event */
#include <linux/hugetlb.h> /* hstate_index_to_shift */
+#include <trace/fault.h>
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -35,6 +36,11 @@ enum x86_pf_error_code {
PF_INSTR = 1 << 4,
};
+DEFINE_TRACE(page_fault_entry);
+DEFINE_TRACE(page_fault_exit);
+DEFINE_TRACE(page_fault_nosem_entry);
+DEFINE_TRACE(page_fault_nosem_exit);
+
/*
* Returns 0 if mmiotrace is disabled, or if the fault is not
* handled by mmiotrace:
@@ -720,6 +726,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (is_errata100(regs, address))
return;
+ trace_page_fault_nosem_entry(regs, 14, address);
if (unlikely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
@@ -729,6 +736,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
tsk->thread.trap_no = 14;
force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
+ trace_page_fault_nosem_exit();
return;
}
@@ -1124,7 +1132,9 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault:
*/
+ trace_page_fault_entry(regs, 14, mm, vma, address, write);
fault = handle_mm_fault(mm, vma, address, flags);
+ trace_page_fault_exit(fault);
if (unlikely(fault & VM_FAULT_ERROR)) {
mm_fault_error(regs, error_code, address, fault);
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6acc724d5d8..14b9317eccb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -6,6 +6,7 @@
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/cpu.h>
+#include <trace/irq.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
@@ -141,6 +142,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
f = &flush_state[sender];
+ trace_irq_entry(sender, regs, NULL);
+
if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
goto out;
/*
@@ -167,6 +170,7 @@ out:
cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
smp_mb__after_clear_bit();
inc_irq_stat(irq_tlb_count);
+ trace_irq_exit(IRQ_HANDLED);
}
static void flush_tlb_others_ipi(const struct cpumask *cpumask,
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index ee55754cc3c..7bc481508d0 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -22,6 +22,8 @@
#include <asm/hpet.h>
#include <asm/unistd.h>
#include <asm/io.h>
+#include <asm/trace-clock.h>
+#include <asm/timer.h>
#include "vextern.h"
#define gtod vdso_vsyscall_gtod_data
@@ -111,6 +113,46 @@ notrace static noinline int do_monotonic_coarse(struct timespec *ts)
return 0;
}
+/*
+ * If the TSC is synchronized across all CPUs, read the current TSC
+ * and export its value in the nsec field of the timespec
+ */
+notrace static noinline int do_trace_clock(struct timespec *ts)
+{
+ unsigned long seq;
+ union lttng_timespec *lts = (union lttng_timespec *) ts;
+
+ do {
+ seq = read_seqbegin(&gtod->lock);
+ if (unlikely(!gtod->trace_clock_is_sync))
+ return vdso_fallback_gettime(CLOCK_TRACE, ts);
+ /*
+ * We don't protect the rdtsc with the rdtsc_barrier because
+ * we can't obtain with tracing that level of precision.
+ * The operation of recording an event is not atomic therefore
+ * the small chance of imprecision doesn't justify the overhead
+ * of a barrier.
+ */
+ /*
+ * TODO: check that vget_cycles(), using paravirt ops, will
+ * match the TSC read by get_cycles() at the kernel level.
+ */
+ lts->lttng_ts = vget_cycles();
+ } while (unlikely(read_seqretry(&gtod->lock, seq)));
+
+ return 0;
+}
+
+/*
+ * Returns the cpu_khz, it needs to be a syscall because we can't access
+ * this value from userspace and it will only be called at the beginning
+ * of the tracing session
+ */
+notrace static noinline int do_trace_clock_freq(struct timespec *ts)
+{
+ return vdso_fallback_gettime(CLOCK_TRACE_FREQ, ts);
+}
+
notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
{
if (likely(gtod->sysctl_enabled))
@@ -127,6 +169,12 @@ notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
return do_realtime_coarse(ts);
case CLOCK_MONOTONIC_COARSE:
return do_monotonic_coarse(ts);
+ case CLOCK_TRACE:
+ return do_trace_clock(ts);
+ case CLOCK_TRACE_FREQ:
+ return do_trace_clock_freq(ts);
+ default:
+ return -EINVAL;
}
return vdso_fallback_gettime(clock, ts);
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 50542efe45f..e3839c74ec4 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -974,6 +974,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
.read_pmc = native_read_pmc,
.iret = xen_iret,
+ .nmi_return = xen_iret,
.irq_enable_sysexit = xen_sysexit,
#ifdef CONFIG_X86_64
.usergs_sysret32 = xen_sysret32,
diff --git a/arch/xtensa/include/asm/thread_info.h b/arch/xtensa/include/asm/thread_info.h
index 7be8accb0b0..a380dcf32a5 100644
--- a/arch/xtensa/include/asm/thread_info.h
+++ b/arch/xtensa/include/asm/thread_info.h
@@ -131,6 +131,7 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_IRET 4 /* return with iret */
#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 6 /* restore signal mask in do_signal() */
+#define TIF_KERNEL_TRACE 7 /* kernel trace active */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_FREEZE 17 /* is freezing for suspend */
@@ -139,11 +140,12 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_NEED_RESCHED (1<<TIF_NEED_RESCHED)
#define _TIF_SINGLESTEP (1<<TIF_SINGLESTEP)
#define _TIF_IRET (1<<TIF_IRET)
+#define _TIF_KERNEL_TRACE (1<<TIF_KERNEL_TRACE)
#define _TIF_POLLING_NRFLAG (1<<TIF_POLLING_NRFLAG)
#define _TIF_RESTORE_SIGMASK (1<<TIF_RESTORE_SIGMASK)
#define _TIF_FREEZE (1<<TIF_FREEZE)
-#define _TIF_WORK_MASK 0x0000FFFE /* work to do on interrupt/exception return */
+#define _TIF_WORK_MASK 0x0000FF7E /* work to do on interrupt/exception return */
#define _TIF_ALLWORK_MASK 0x0000FFFF /* work to do on any return to u-space */
/*
diff --git a/drivers/idle/i7300_idle.c b/drivers/idle/i7300_idle.c
index c976285d313..f7a7d69c3a6 100644
--- a/drivers/idle/i7300_idle.c
+++ b/drivers/idle/i7300_idle.c
@@ -27,6 +27,7 @@
#include <linux/debugfs.h>
#include <linux/stop_machine.h>
#include <linux/i7300_idle.h>
+#include <linux/idle.h>
#include <asm/idle.h>
@@ -584,7 +585,7 @@ static int __init i7300_idle_init(void)
}
}
- idle_notifier_register(&i7300_idle_nb);
+ register_idle_notifier(&i7300_idle_nb);
printk(KERN_INFO "i7300_idle: loaded v%s\n", I7300_IDLE_DRIVER_VERSION);
return 0;
@@ -592,7 +593,7 @@ static int __init i7300_idle_init(void)
static void __exit i7300_idle_exit(void)
{
- idle_notifier_unregister(&i7300_idle_nb);
+ unregister_idle_notifier(&i7300_idle_nb);
free_cpumask_var(idle_cpumask);
if (debugfs_dir) {
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 11905b6a302..38930b01cb2 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -218,6 +218,9 @@ static void input_handle_event(struct input_dev *dev,
{
int disposition = INPUT_IGNORE_EVENT;
+ trace_mark(input, input_event,
+ "type %u code %u value %d", type, code, value);
+
switch (type) {
case EV_SYN:
diff --git a/drivers/net/wan/hd64570.c b/drivers/net/wan/hd64570.c
index a3ea27ce04f..33e920aa3a7 100644
--- a/drivers/net/wan/hd64570.c
+++ b/drivers/net/wan/hd64570.c
@@ -104,7 +104,7 @@ static inline u16 desc_abs_number(port_t *port, u16 desc, int transmit)
}
-static inline u16 desc_offset(port_t *port, u16 desc, int transmit)
+static inline u16 hd_desc_offset(port_t *port, u16 desc, int transmit)
{
/* Descriptor offset always fits in 16 bits */
return desc_abs_number(port, desc, transmit) * sizeof(pkt_desc);
@@ -116,10 +116,10 @@ static inline pkt_desc __iomem *desc_address(port_t *port, u16 desc,
{
#ifdef PAGE0_ALWAYS_MAPPED
return (pkt_desc __iomem *)(win0base(port_to_card(port))
- + desc_offset(port, desc, transmit));
+ + hd_desc_offset(port, desc, transmit));
#else
return (pkt_desc __iomem *)(winbase(port_to_card(port))
- + desc_offset(port, desc, transmit));
+ + hd_desc_offset(port, desc, transmit));
#endif
}
@@ -169,7 +169,7 @@ static void sca_init_port(port_t *port)
for (i = 0; i < buffs; i++) {
pkt_desc __iomem *desc = desc_address(port, i, transmit);
- u16 chain_off = desc_offset(port, i + 1, transmit);
+ u16 chain_off = hd_desc_offset(port, i + 1, transmit);
u32 buff_off = buffer_offset(port, i, transmit);
writew(chain_off, &desc->cp);
@@ -187,12 +187,12 @@ static void sca_init_port(port_t *port)
/* current desc addr */
sca_out(0, dmac + CPB, card); /* pointer base */
- sca_outw(desc_offset(port, 0, transmit), dmac + CDAL, card);
+ sca_outw(hd_desc_offset(port, 0, transmit), dmac + CDAL, card);
if (!transmit)
- sca_outw(desc_offset(port, buffs - 1, transmit),
+ sca_outw(hd_desc_offset(port, buffs - 1, transmit),
dmac + EDAL, card);
else
- sca_outw(desc_offset(port, 0, transmit), dmac + EDAL,
+ sca_outw(hd_desc_offset(port, 0, transmit), dmac + EDAL,
card);
/* clear frame end interrupt counter */
@@ -305,7 +305,7 @@ static inline void sca_rx_intr(port_t *port)
dev->stats.rx_over_errors++;
while (1) {
- u32 desc_off = desc_offset(port, port->rxin, 0);
+ u32 desc_off = hd_desc_offset(port, port->rxin, 0);
pkt_desc __iomem *desc;
u32 cda = sca_inw(dmac + CDAL, card);
@@ -359,7 +359,7 @@ static inline void sca_tx_intr(port_t *port)
while (1) {
pkt_desc __iomem *desc;
- u32 desc_off = desc_offset(port, port->txlast, 1);
+ u32 desc_off = hd_desc_offset(port, port->txlast, 1);
u32 cda = sca_inw(dmac + CDAL, card);
if ((cda >= desc_off) && (cda < desc_off + sizeof(pkt_desc)))
break; /* Transmitter is/will_be sending this frame */
@@ -660,7 +660,7 @@ static netdev_tx_t sca_xmit(struct sk_buff *skb, struct net_device *dev)
writeb(ST_TX_EOM, &desc->stat);
port->txin = next_desc(port, port->txin, 1);
- sca_outw(desc_offset(port, port->txin, 1),
+ sca_outw(hd_desc_offset(port, port->txin, 1),
get_dmac_tx(port) + EDAL, card);
sca_out(DSR_DE, DSR_TX(phy_node(port)), card); /* Enable TX DMA */
diff --git a/drivers/net/wan/hd64572.c b/drivers/net/wan/hd64572.c
index e305274f83f..5c801ea8cc0 100644
--- a/drivers/net/wan/hd64572.c
+++ b/drivers/net/wan/hd64572.c
@@ -87,7 +87,7 @@ static inline u16 desc_abs_number(port_t *port, u16 desc, int transmit)
}
-static inline u16 desc_offset(port_t *port, u16 desc, int transmit)
+static inline u16 hd_desc_offset(port_t *port, u16 desc, int transmit)
{
/* Descriptor offset always fits in 16 bits */
return desc_abs_number(port, desc, transmit) * sizeof(pkt_desc);
@@ -98,7 +98,7 @@ static inline pkt_desc __iomem *desc_address(port_t *port, u16 desc,
int transmit)
{
return (pkt_desc __iomem *)(port->card->rambase +
- desc_offset(port, desc, transmit));
+ hd_desc_offset(port, desc, transmit));
}
@@ -143,7 +143,7 @@ static void sca_init_port(port_t *port)
for (i = 0; i < buffs; i++) {
pkt_desc __iomem *desc = desc_address(port, i, transmit);
- u16 chain_off = desc_offset(port, i + 1, transmit);
+ u16 chain_off = hd_desc_offset(port, i + 1, transmit);
u32 buff_off = buffer_offset(port, i, transmit);
writel(chain_off, &desc->cp);
@@ -162,11 +162,11 @@ static void sca_init_port(port_t *port)
sca_out(DCR_ABORT, DCR_TX(port->chan), card);
/* current desc addr */
- sca_outl(desc_offset(port, 0, 0), dmac_rx + CDAL, card);
- sca_outl(desc_offset(port, card->tx_ring_buffers - 1, 0),
+ sca_outl(hd_desc_offset(port, 0, 0), dmac_rx + CDAL, card);
+ sca_outl(hd_desc_offset(port, card->tx_ring_buffers - 1, 0),
dmac_rx + EDAL, card);
- sca_outl(desc_offset(port, 0, 1), dmac_tx + CDAL, card);
- sca_outl(desc_offset(port, 0, 1), dmac_tx + EDAL, card);
+ sca_outl(hd_desc_offset(port, 0, 1), dmac_tx + CDAL, card);
+ sca_outl(hd_desc_offset(port, 0, 1), dmac_tx + EDAL, card);
/* clear frame end interrupt counter */
sca_out(DCR_CLEAR_EOF, DCR_RX(port->chan), card);
@@ -249,7 +249,7 @@ static inline int sca_rx_done(port_t *port, int budget)
dev->stats.rx_over_errors++;
while (received < budget) {
- u32 desc_off = desc_offset(port, port->rxin, 0);
+ u32 desc_off = hd_desc_offset(port, port->rxin, 0);
pkt_desc __iomem *desc;
u32 cda = sca_inl(dmac + CDAL, card);
@@ -590,7 +590,7 @@ static netdev_tx_t sca_xmit(struct sk_buff *skb, struct net_device *dev)
writeb(ST_TX_EOM, &desc->stat);
port->txin = (port->txin + 1) % card->tx_ring_buffers;
- sca_outl(desc_offset(port, port->txin, 1),
+ sca_outl(hd_desc_offset(port, port->txin, 1),
get_dmac_tx(port) + EDAL, card);
sca_out(DSR_DE, DSR_TX(port->chan), card); /* Enable TX DMA */
diff --git a/fs/buffer.c b/fs/buffer.c
index 2219a76e2ca..5d0c2c6045c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,11 +41,15 @@
#include <linux/bitops.h>
#include <linux/mpage.h>
#include <linux/bit_spinlock.h>
+#include <trace/fs.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
+DEFINE_TRACE(fs_buffer_wait_start);
+DEFINE_TRACE(fs_buffer_wait_end);
+
inline void
init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
{
@@ -90,7 +94,9 @@ EXPORT_SYMBOL(unlock_buffer);
*/
void __wait_on_buffer(struct buffer_head * bh)
{
+ trace_fs_buffer_wait_start(bh);
wait_on_bit(&bh->b_state, BH_Lock, sync_buffer, TASK_UNINTERRUPTIBLE);
+ trace_fs_buffer_wait_end(bh);
}
EXPORT_SYMBOL(__wait_on_buffer);
diff --git a/fs/compat.c b/fs/compat.c
index f6fd0a00e6c..7b0389465cd 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -50,6 +50,7 @@
#include <linux/fs_struct.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -1529,6 +1530,7 @@ int compat_do_execve(char * filename,
if (retval < 0)
goto out;
+ trace_fs_exec(filename);
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
diff --git a/fs/exec.c b/fs/exec.c
index 52a447d9b6a..9a92bbe142d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -55,6 +55,7 @@
#include <linux/fs_struct.h>
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -77,6 +78,11 @@ static atomic_t call_count = ATOMIC_INIT(1);
static LIST_HEAD(formats);
static DEFINE_RWLOCK(binfmt_lock);
+/*
+ * Also used in compat.c.
+ */
+DEFINE_TRACE(fs_exec);
+
int __register_binfmt(struct linux_binfmt * fmt, int insert)
{
if (!fmt)
@@ -1447,6 +1453,7 @@ int do_execve(const char * filename,
if (retval < 0)
goto out;
+ trace_fs_exec(filename);
/* execve succeeded */
current->fs->in_exec = 0;
current->in_execve = 0;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 1eebeb72b20..a1fecf33b11 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -15,9 +15,12 @@
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/falloc.h>
+#include <trace/fs.h>
#include <asm/ioctls.h>
+DEFINE_TRACE(fs_ioctl);
+
/* So that the fiemap access checks can't overflow on 32 bit machines. */
#define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent))
@@ -616,6 +619,8 @@ SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
if (!filp)
goto out;
+ trace_fs_ioctl(fd, cmd, arg);
+
error = security_file_ioctl(filp, cmd, arg);
if (error)
goto out_fput;
diff --git a/fs/open.c b/fs/open.c
index 5a2c6ebc22b..d29d5eb3c43 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,9 +30,13 @@
#include <linux/fs_struct.h>
#include <linux/ima.h>
#include <linux/dnotify.h>
+#include <trace/fs.h>
#include "internal.h"
+DEFINE_TRACE(fs_open);
+DEFINE_TRACE(fs_close);
+
int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
struct file *filp)
{
@@ -898,6 +902,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
fsnotify_open(f);
fd_install(fd, f);
}
+ trace_fs_open(fd, tmp);
}
putname(tmp);
}
@@ -987,6 +992,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
filp = fdt->fd[fd];
if (!filp)
goto out_unlock;
+ trace_fs_close(fd);
rcu_assign_pointer(fdt->fd[fd], NULL);
FD_CLR(fd, fdt->close_on_exec);
__put_unused_fd(files, fd);
diff --git a/fs/read_write.c b/fs/read_write.c
index 5520f8ad550..6a3f7f9c9db 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -15,6 +15,7 @@
#include <linux/syscalls.h>
#include <linux/pagemap.h>
#include <linux/splice.h>
+#include <trace/fs.h>
#include "read_write.h"
#include <asm/uaccess.h>
@@ -30,6 +31,15 @@ const struct file_operations generic_ro_fops = {
EXPORT_SYMBOL(generic_ro_fops);
+DEFINE_TRACE(fs_lseek);
+DEFINE_TRACE(fs_llseek);
+DEFINE_TRACE(fs_read);
+DEFINE_TRACE(fs_write);
+DEFINE_TRACE(fs_pread64);
+DEFINE_TRACE(fs_pwrite64);
+DEFINE_TRACE(fs_readv);
+DEFINE_TRACE(fs_writev);
+
static inline int unsigned_offsets(struct file *file)
{
return file->f_mode & FMODE_UNSIGNED_OFFSET;
@@ -187,6 +197,9 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin)
if (res != (loff_t)retval)
retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
}
+
+ trace_fs_lseek(fd, offset, origin);
+
fput_light(file, fput_needed);
bad:
return retval;
@@ -214,6 +227,8 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low,
origin);
+ trace_fs_llseek(fd, offset, origin);
+
retval = (int)offset;
if (offset >= 0) {
retval = -EFAULT;
@@ -409,6 +424,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_read(file, buf, count, &pos);
+ trace_fs_read(fd, buf, count, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
@@ -427,6 +443,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_write(file, buf, count, &pos);
+ trace_fs_write(fd, buf, count, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
@@ -447,8 +464,11 @@ SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf,
file = fget_light(fd, &fput_needed);
if (file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PREAD)
+ if (file->f_mode & FMODE_PREAD) {
ret = vfs_read(file, buf, count, &pos);
+ trace_fs_pread64(fd, buf, count, pos, ret);
+ }
+
fput_light(file, fput_needed);
}
@@ -476,8 +496,10 @@ SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf,
file = fget_light(fd, &fput_needed);
if (file) {
ret = -ESPIPE;
- if (file->f_mode & FMODE_PWRITE)
+ if (file->f_mode & FMODE_PWRITE) {
ret = vfs_write(file, buf, count, &pos);
+ trace_fs_pwrite64(fd, buf, count, pos, ret);
+ }
fput_light(file, fput_needed);
}
@@ -736,6 +758,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_readv(file, vec, vlen, &pos);
+ trace_fs_readv(fd, vec, vlen, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
@@ -757,6 +780,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
if (file) {
loff_t pos = file_pos_read(file);
ret = vfs_writev(file, vec, vlen, &pos);
+ trace_fs_writev(fd, vec, vlen, ret);
file_pos_write(file, pos);
fput_light(file, fput_needed);
}
diff --git a/fs/select.c b/fs/select.c
index e56560d2b08..64c2404f2cc 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -26,6 +26,7 @@
#include <linux/fs.h>
#include <linux/rcupdate.h>
#include <linux/hrtimer.h>
+#include <trace/fs.h>
#include <asm/uaccess.h>
@@ -98,6 +99,9 @@ struct poll_table_page {
#define POLL_TABLE_FULL(table) \
((unsigned long)((table)->entry+1) > PAGE_SIZE + (unsigned long)(table))
+DEFINE_TRACE(fs_select);
+DEFINE_TRACE(fs_poll);
+
/*
* Ok, Peter made a complicated, but straightforward multiple_wait() function.
* I have rewritten this, taking some shortcuts: This code may not be easy to
@@ -112,6 +116,9 @@ struct poll_table_page {
*/
static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
poll_table *p);
+static void __pollwait_exclusive(struct file *filp,
+ wait_queue_head_t *wait_address,
+ poll_table *p);
void poll_initwait(struct poll_wqueues *pwq)
{
@@ -152,6 +159,20 @@ void poll_freewait(struct poll_wqueues *pwq)
}
EXPORT_SYMBOL(poll_freewait);
+/**
+ * poll_wait_set_exclusive - set poll wait queue to exclusive
+ *
+ * Sets up a poll wait queue to use exclusive wakeups. This is useful to
+ * wake up only one waiter at each wakeup. Used to work-around "thundering herd"
+ * problem.
+ */
+void poll_wait_set_exclusive(poll_table *p)
+{
+ if (p)
+ init_poll_funcptr(p, __pollwait_exclusive);
+}
+EXPORT_SYMBOL(poll_wait_set_exclusive);
+
static struct poll_table_entry *poll_get_entry(struct poll_wqueues *p)
{
struct poll_table_page *table = p->table;
@@ -213,8 +234,10 @@ static int pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
}
/* Add a new entry */
-static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
- poll_table *p)
+static void __pollwait_common(struct file *filp,
+ wait_queue_head_t *wait_address,
+ poll_table *p,
+ int exclusive)
{
struct poll_wqueues *pwq = container_of(p, struct poll_wqueues, pt);
struct poll_table_entry *entry = poll_get_entry(pwq);
@@ -226,7 +249,23 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
entry->key = p->key;
init_waitqueue_func_entry(&entry->wait, pollwake);
entry->wait.private = pwq;
- add_wait_queue(wait_address, &entry->wait);
+ if (!exclusive)
+ add_wait_queue(wait_address, &entry->wait);
+ else
+ add_wait_queue_exclusive(wait_address, &entry->wait);
+}
+
+static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
+ poll_table *p)
+{
+ __pollwait_common(filp, wait_address, p, 0);
+}
+
+static void __pollwait_exclusive(struct file *filp,
+ wait_queue_head_t *wait_address,
+ poll_table *p)
+{
+ __pollwait_common(filp, wait_address, p, 1);
}
int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
@@ -450,6 +489,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
file = fget_light(i, &fput_needed);
if (file) {
f_op = file->f_op;
+ trace_fs_select(i, end_time);
mask = DEFAULT_POLLMASK;
if (f_op && f_op->poll) {
wait_key_set(wait, in, out, bit);
@@ -739,6 +779,7 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait)
file = fget_light(fd, &fput_needed);
mask = POLLNVAL;
if (file != NULL) {
+ trace_fs_poll(fd);
mask = DEFAULT_POLLMASK;
if (file->f_op && file->f_op->poll) {
if (pwait)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 05d6b0e78c9..691c84baf4f 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -819,3 +819,47 @@ struct hlist_node *seq_hlist_next_rcu(void *v,
return rcu_dereference(node->next);
}
EXPORT_SYMBOL(seq_hlist_next_rcu);
+
+struct list_head *seq_sorted_list_start(struct list_head *head, loff_t *ppos)
+{
+ struct list_head *lh;
+
+ list_for_each(lh, head)
+ if ((unsigned long)lh >= *ppos) {
+ *ppos = (unsigned long)lh;
+ return lh;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(seq_sorted_list_start);
+
+struct list_head *seq_sorted_list_start_head(struct list_head *head,
+ loff_t *ppos)
+{
+ struct list_head *lh;
+
+ if (!*ppos) {
+ *ppos = (unsigned long)head;
+ return head;
+ }
+ list_for_each(lh, head)
+ if ((unsigned long)lh >= *ppos) {
+ *ppos = (long)lh->prev;
+ return lh->prev;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL(seq_sorted_list_start_head);
+
+struct list_head *seq_sorted_list_next(void *p, struct list_head *head,
+ loff_t *ppos)
+{
+ struct list_head *lh;
+ void *next;
+
+ lh = ((struct list_head *)p)->next;
+ next = (lh == head) ? NULL : lh;
+ *ppos = next ? ((unsigned long)next) : (-1UL);
+ return next;
+}
+EXPORT_SYMBOL(seq_sorted_list_next);
diff --git a/fs/splice.c b/fs/splice.c
index 50a5d978da1..e76aac5c993 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -259,6 +259,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
return ret;
}
+EXPORT_SYMBOL_GPL(splice_to_pipe);
static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
{
diff --git a/include/asm-generic/trace-clock.h b/include/asm-generic/trace-clock.h
new file mode 100644
index 00000000000..138ac9b7c54
--- /dev/null
+++ b/include/asm-generic/trace-clock.h
@@ -0,0 +1,76 @@
+#ifndef _ASM_GENERIC_TRACE_CLOCK_H
+#define _ASM_GENERIC_TRACE_CLOCK_H
+
+/*
+ * include/asm-generic/trace-clock.h
+ *
+ * Copyright (C) 2007 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ *
+ * Generic tracing clock for architectures without TSC.
+ */
+
+#include <linux/param.h> /* For HZ */
+#include <asm/atomic.h>
+
+#define TRACE_CLOCK_SHIFT 13
+
+/*
+ * Number of hardware clock bits. The higher order bits are expected to be 0.
+ * If the hardware clock source has more than 32 bits, the bits higher than the
+ * 32nd will be truncated by a cast to a 32 bits unsigned. Range : 1 - 32.
+ * (too few bits would be unrealistic though, since we depend on the timer to
+ * detect the overflows).
+ */
+#define TC_HW_BITS 32
+
+/* Expected maximum interrupt latency in ms : 15ms, *2 for security */
+#define TC_EXPECTED_INTERRUPT_LATENCY 30
+
+extern atomic_long_t trace_clock_var;
+
+static inline u32 trace_clock_read32(void)
+{
+ return (u32)atomic_long_add_return(1, &trace_clock_var);
+}
+
+#ifdef CONFIG_HAVE_TRACE_CLOCK_32_TO_64
+extern u64 trace_clock_read_synthetic_tsc(void);
+extern void get_synthetic_tsc(void);
+extern void put_synthetic_tsc(void);
+
+static inline u64 trace_clock_read64(void)
+{
+ return trace_clock_read_synthetic_tsc();
+}
+#else
+static inline void get_synthetic_tsc(void)
+{
+}
+
+static inline void put_synthetic_tsc(void)
+{
+}
+
+static inline u64 trace_clock_read64(void)
+{
+ return atomic_long_add_return(1, &trace_clock_var);
+}
+#endif
+
+static inline unsigned int trace_clock_frequency(void)
+{
+ return HZ << TRACE_CLOCK_SHIFT;
+}
+
+static inline u32 trace_clock_freq_scale(void)
+{
+ return 1;
+}
+
+extern void get_trace_clock(void);
+extern void put_trace_clock(void);
+
+static inline void set_trace_clock_is_sync(int state)
+{
+}
+#endif /* _ASM_GENERIC_TRACE_CLOCK_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index fe77e3395b4..9d4b63d7ab0 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -167,6 +167,12 @@
CPU_KEEP(exit.data) \
MEM_KEEP(init.data) \
MEM_KEEP(exit.data) \
+ . = ALIGN(128); \
+ VMLINUX_SYMBOL(__start___markers) = .; \
+ *(__markers) \
+ VMLINUX_SYMBOL(__stop___markers) = .; \
+ . = ALIGN(32); \
+ VMLINUX_SYMBOL(__start___tracepoints) = .; \
STRUCT_ALIGN(); \
*(__tracepoints) \
/* implement dynamic printk debug */ \
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index b0ada6f37dd..a63b8001b7e 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -42,6 +42,7 @@ header-y += adfs_fs.h
header-y += affs_hardblocks.h
header-y += agpgart.h
header-y += aio_abi.h
+header-y += align.h
header-y += apm_bios.h
header-y += arcfb.h
header-y += atalk.h
diff --git a/include/linux/align.h b/include/linux/align.h
new file mode 100644
index 00000000000..68bad1ac089
--- /dev/null
+++ b/include/linux/align.h
@@ -0,0 +1,66 @@
+#ifndef _LINUX_ALIGN_H
+#define _LINUX_ALIGN_H
+
+#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL_MASK(x, mask) \
+ (((x) + (mask)) & ~(mask))
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+
+#define ALIGN(x, a) __ALIGN_KERNEL(x, a)
+#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK(x, mask)
+#define PTR_ALIGN(p, a) ((typeof(p)) ALIGN((unsigned long) (p), a))
+#define ALIGN_FLOOR(x, a) __ALIGN_FLOOR_MASK(x, (typeof(x)) (a) - 1)
+#define __ALIGN_FLOOR_MASK(x, mask) ((x) & ~(mask))
+#define PTR_ALIGN_FLOOR(p, a) \
+ ((typeof(p)) ALIGN_FLOOR((unsigned long) (p), a))
+#define IS_ALIGNED(x, a) (((x) & ((typeof(x)) (a) - 1)) == 0)
+
+/*
+ * Align pointer on natural object alignment.
+ */
+#define object_align(obj) PTR_ALIGN(obj, __alignof__(*(obj)))
+#define object_align_floor(obj) PTR_ALIGN_FLOOR(obj, __alignof__(*(obj)))
+
+#define MAYBE_BUILD_BUG_ON(condition) \
+do { \
+ if (__builtin_constant_p(condition)) \
+ BUILD_BUG_ON(condition); \
+} while (0)
+
+/**
+ * offset_align - Calculate the offset needed to align an object on its natural
+ * alignment towards higher addresses.
+ * @align_drift: object offset from an "alignment"-aligned address.
+ * @alignment: natural object alignment. Must be non-zero, power of 2.
+ *
+ * Returns the offset that must be added to align towards higher
+ * addresses.
+ */
+#define offset_align(align_drift, alignment) \
+ ({ \
+ MAYBE_BUILD_BUG_ON((alignment) == 0 \
+ || ((alignment) & ((alignment) - 1))); \
+ (((alignment) - (align_drift)) & ((alignment) - 1)); \
+ })
+
+/**
+ * offset_align_floor - Calculate the offset needed to align an object
+ * on its natural alignment towards lower addresses.
+ * @align_drift: object offset from an "alignment"-aligned address.
+ * @alignment: natural object alignment. Must be non-zero, power of 2.
+ *
+ * Returns the offset that must be substracted to align towards lower addresses.
+ */
+#define offset_align_floor(align_drift, alignment) \
+ ({ \
+ MAYBE_BUILD_BUG_ON((alignment) == 0 \
+ || ((alignment) & ((alignment) - 1))); \
+ (((align_drift) - (alignment)) & ((alignment) - 1); \
+ })
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/include/linux/idle.h b/include/linux/idle.h
new file mode 100644
index 00000000000..75bd2a422c8
--- /dev/null
+++ b/include/linux/idle.h
@@ -0,0 +1,19 @@
+/*
+ * include/linux/idle.h - generic idle definition
+ *
+ */
+#ifndef _LINUX_IDLE_H_
+#define _LINUX_IDLE_H_
+
+#include <linux/notifier.h>
+
+enum idle_val {
+ IDLE_START = 1,
+ IDLE_END = 2,
+};
+
+int notify_idle(enum idle_val val);
+void register_idle_notifier(struct notifier_block *n);
+void unregister_idle_notifier(struct notifier_block *n);
+
+#endif /* _LINUX_IDLE_H_ */
diff --git a/include/linux/immediate.h b/include/linux/immediate.h
new file mode 100644
index 00000000000..0d62cab5c2e
--- /dev/null
+++ b/include/linux/immediate.h
@@ -0,0 +1,93 @@
+#ifndef _LINUX_IMMEDIATE_H
+#define _LINUX_IMMEDIATE_H
+
+/*
+ * Immediate values, can be updated at runtime and save cache lines.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * Dual BSD/GPL v2 license.
+ */
+
+#ifdef CONFIG_IMMEDIATE
+
+struct __imv {
+ unsigned long var; /* Pointer to the identifier variable of the
+ * immediate value
+ */
+ unsigned long imv; /*
+ * Pointer to the memory location of the
+ * immediate value within the instruction.
+ */
+ unsigned char size; /* Type size. */
+} __attribute__ ((packed));
+
+#include <asm/immediate.h>
+
+/**
+ * imv_set - set immediate variable (with locking)
+ * @name: immediate value name
+ * @i: required value
+ *
+ * Sets the value of @name, taking the module_mutex if required by
+ * the architecture.
+ */
+#define imv_set(name, i) \
+ do { \
+ name##__imv = (i); \
+ core_imv_update(); \
+ module_imv_update(); \
+ } while (0)
+
+/*
+ * Internal update functions.
+ */
+extern void core_imv_update(void);
+extern void imv_update_range(const struct __imv *begin,
+ const struct __imv *end);
+
+#else
+
+/*
+ * Generic immediate values: a simple, standard, memory load.
+ */
+
+/**
+ * imv_read - read immediate variable
+ * @name: immediate value name
+ *
+ * Reads the value of @name.
+ */
+#define imv_read(name) _imv_read(name)
+
+/**
+ * imv_set - set immediate variable (with locking)
+ * @name: immediate value name
+ * @i: required value
+ *
+ * Sets the value of @name, taking the module_mutex if required by
+ * the architecture.
+ */
+#define imv_set(name, i) (name##__imv = (i))
+
+static inline void core_imv_update(void) { }
+static inline void module_imv_update(void) { }
+
+#endif
+
+#define DECLARE_IMV(type, name) extern __typeof__(type) name##__imv
+#define DEFINE_IMV(type, name) __typeof__(type) name##__imv
+
+#define EXPORT_IMV_SYMBOL(name) EXPORT_SYMBOL(name##__imv)
+#define EXPORT_IMV_SYMBOL_GPL(name) EXPORT_SYMBOL_GPL(name##__imv)
+
+/**
+ * _imv_read - Read immediate value with standard memory load.
+ * @name: immediate value name
+ *
+ * Force a data read of the immediate value instead of the immediate value
+ * based mechanism. Useful for __init and __exit section data read.
+ */
+#define _imv_read(name) (name##__imv)
+
+#endif
diff --git a/include/linux/irqnr.h b/include/linux/irqnr.h
index 3bc4dcab6e8..3f8c992934a 100644
--- a/include/linux/irqnr.h
+++ b/include/linux/irqnr.h
@@ -24,6 +24,7 @@
#else /* CONFIG_GENERIC_HARDIRQS */
extern int nr_irqs;
+struct irq_desc;
extern struct irq_desc *irq_to_desc(unsigned int irq);
unsigned int irq_get_next_irq(unsigned int offset);
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2fe6e84894a..eff75504cf1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -4,8 +4,8 @@
/*
* 'kernel.h' contains some often-used function prototypes etc
*/
-#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
-#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
+
+#include <linux/align.h>
#ifdef __KERNEL__
@@ -37,11 +37,6 @@
#define STACK_MAGIC 0xdeadbeef
-#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
-#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask))
-#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a)))
-#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
-
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b5021db2185..914f0196130 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -15,6 +15,7 @@
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/preempt.h>
+#include <linux/marker.h>
#include <linux/msi.h>
#include <linux/slab.h>
#include <linux/rcupdate.h>
diff --git a/include/linux/ltt-channels.h b/include/linux/ltt-channels.h
new file mode 100644
index 00000000000..d8d368d5440
--- /dev/null
+++ b/include/linux/ltt-channels.h
@@ -0,0 +1,108 @@
+#ifndef _LTT_CHANNELS_H
+#define _LTT_CHANNELS_H
+
+/*
+ * Copyright (C) 2008 Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ *
+ * Dynamic tracer channel allocation.
+
+ * Dual LGPL v2.1/GPL v2 license.
+ */
+
+#include <linux/limits.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/timer.h>
+#include <linux/ltt-core.h>
+
+#define EVENTS_PER_CHANNEL 65536
+
+/*
+ * Forward declaration of locking-specific per-cpu buffer structure.
+ */
+struct ltt_chanbuf;
+struct ltt_trace;
+struct ltt_serialize_closure;
+struct ltt_probe_private_data;
+
+/* Serialization callback '%k' */
+typedef size_t (*ltt_serialize_cb)(struct ltt_chanbuf *buf, size_t buf_offset,
+ struct ltt_serialize_closure *closure,
+ void *serialize_private,
+ unsigned int stack_pos_ctx,
+ int *largest_align,
+ const char *fmt, va_list *args);
+
+struct ltt_probe_private_data {
+ struct ltt_trace *trace; /*
+ * Target trace, for metadata
+ * or statedump.
+ */
+ ltt_serialize_cb serializer; /*
+ * Serialization function override.
+ */
+ void *serialize_private; /*
+ * Private data for serialization
+ * functions.
+ */
+};
+
+struct ltt_chan_alloc {
+ unsigned long buf_size; /* Size of the buffer */
+ unsigned long sb_size; /* Sub-buffer size */
+ unsigned int sb_size_order; /* Order of sub-buffer size */
+ unsigned int n_sb_order; /* Number of sub-buffers per buffer */
+ int extra_reader_sb:1; /* Bool: has extra reader subbuffer */
+ struct ltt_chanbuf *buf; /* Channel per-cpu buffers */
+
+ struct kref kref; /* Reference count */
+ unsigned long n_sb; /* Number of sub-buffers */
+ struct dentry *parent; /* Associated parent dentry */
+ struct dentry *ascii_dentry; /* Text output dentry */
+ struct ltt_trace *trace; /* Associated trace */
+ char filename[NAME_MAX]; /* Filename for channel files */
+};
+
+struct ltt_chan {
+ struct ltt_chan_alloc a; /* Parent. First field. */
+ int overwrite:1;
+ int active:1;
+ unsigned long commit_count_mask; /*
+ * Commit count mask, removing
+ * the MSBs corresponding to
+ * bits used to represent the
+ * subbuffer index.
+ */
+ unsigned long switch_timer_interval;
+};
+
+struct ltt_channel_setting {
+ unsigned int sb_size;
+ unsigned int n_sb;
+ struct kref kref; /* Number of references to structure content */
+ struct list_head list;
+ unsigned int index; /* index of channel in trace channel array */
+ u16 free_event_id; /* Next event ID to allocate */
+ char name[PATH_MAX];
+};
+
+int ltt_channels_register(const char *name);
+int ltt_channels_unregister(const char *name, int compacting);
+int ltt_channels_set_default(const char *name,
+ unsigned int subbuf_size,
+ unsigned int subbuf_cnt);
+const char *ltt_channels_get_name_from_index(unsigned int index);
+int ltt_channels_get_index_from_name(const char *name);
+int ltt_channels_trace_ref(void);
+struct ltt_chan *ltt_channels_trace_alloc(unsigned int *nr_channels,
+ int overwrite, int active);
+void ltt_channels_trace_free(struct ltt_chan *channels,
+ unsigned int nr_channels);
+void ltt_channels_trace_set_timer(struct ltt_chan *chan,
+ unsigned long interval);
+
+int _ltt_channels_get_event_id(const char *channel, const char *name);
+int ltt_channels_get_event_id(const char *channel, const char *name);
+void _ltt_channels_reset_event_ids(void);
+
+#endif /* _LTT_CHANNELS_H */
diff --git a/include/linux/ltt-core.h b/include/linux/ltt-core.h
new file mode 100644
index 00000000000..acb696ed106
--- /dev/null
+++ b/include/linux/ltt-core.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2005-2010 Mathieu Desnoyers (mathieu.desnoyers@efficios.com)
+ *
+ * This contains the core definitions for the Linux Trace Toolkit.
+ *
+ * Dual LGPL v2.1/GPL v2 license.
+ */
+
+#ifndef LTT_CORE_H
+#define LTT_CORE_H
+
+/* Keep track of trap nesting inside LTT */
+DECLARE_PER_CPU(unsigned int, ltt_nesting);
+
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+
+/*
+ * Calculate the offset needed to align the type.
+ * size_of_type must be non-zero.
+ */
+static inline unsigned int ltt_align(size_t align_drift, size_t size_of_type)
+{
+ return offset_align(align_drift, min(sizeof(void *), size_of_type));
+}
+/* Default arch alignment */
+#define LTT_ALIGN
+
+static inline int ltt_get_alignment(void)
+{
+ return sizeof(void *);
+}
+
+extern unsigned int ltt_fmt_largest_align(size_t align_drift, const char *fmt);
+
+#else
+
+static inline unsigned int ltt_align(size_t align_drift,
+ size_t size_of_type)
+{
+ return 0;
+}
+
+#define LTT_ALIGN __attribute__((packed))
+
+static inline int ltt_get_alignment(void)
+{
+ return 0;
+}
+
+static inline unsigned int ltt_fmt_largest_align(size_t align_drift,
+ const char *fmt)
+{
+ return 0;
+}
+
+#endif /* HAVE_EFFICIENT_UNALIGNED_ACCESS */
+
+#endif /* LTT_CORE_H */
diff --git a/include/linux/marker.h b/include/linux/marker.h
new file mode 100644
index 00000000000..c50c66d09f0
--- /dev/null
+++ b/include/linux/marker.h
@@ -0,0 +1,273 @@
+#ifndef _LINUX_MARKER_H
+#define _LINUX_MARKER_H
+
+/*
+ * Code markup for dynamic and static tracing.
+ *
+ * See Documentation/marker.txt.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/immediate.h>
+
+struct module;
+struct marker;
+struct marker_probe_array;
+
+/**
+ * marker_probe_func - Type of a marker probe function
+ * @mdata: marker data
+ * @probe_private: probe private data
+ * @call_private: call site private data
+ * @fmt: format string
+ * @args: variable argument list pointer. Use a pointer to overcome C's
+ * inability to pass this around as a pointer in a portable manner in
+ * the callee otherwise.
+ *
+ * Type of marker probe functions. They receive the mdata and need to parse the
+ * format string to recover the variable argument list.
+ */
+typedef void marker_probe_func(const struct marker *mdata,
+ void *probe_private, void *call_private,
+ const char *fmt, va_list *args);
+
+struct marker_probe_closure {
+ marker_probe_func *func; /* Callback */
+ void *probe_private; /* Private probe data */
+};
+
+struct marker {
+ const char *channel; /* Name of channel where to send data */
+ const char *name; /* Marker name */
+ const char *format; /* Marker format string, describing the
+ * variable argument list.
+ */
+ DEFINE_IMV(char, state);/* Immediate value state. */
+ char ptype; /* probe type : 0 : single, 1 : multi */
+ /* Probe wrapper */
+ u16 channel_id; /* Numeric channel identifier, dynamic */
+ u16 event_id; /* Numeric event identifier, dynamic */
+ void (*call)(const struct marker *mdata, void *call_private, ...);
+ struct marker_probe_closure single;
+ struct marker_probe_array *multi;
+ const char *tp_name; /* Optional tracepoint name */
+ void *tp_cb; /* Optional tracepoint callback */
+} __attribute__((aligned(128))); /*
+ * Aligned on 128 bytes because it is
+ * globally visible and gcc happily
+ * align these on the structure size.
+ * Keep in sync with vmlinux.lds.h.
+ */
+
+#ifdef CONFIG_MARKERS
+
+#define _DEFINE_MARKER(channel, name, tp_name_str, tp_cb, format) \
+ static const char __mstrtab_##channel##_##name[] \
+ __attribute__((section("__markers_strings"))) \
+ = #channel "\0" #name "\0" format; \
+ static struct marker __mark_##channel##_##name \
+ __attribute__((section("__markers"), aligned(128))) = \
+ { __mstrtab_##channel##_##name, \
+ &__mstrtab_##channel##_##name[sizeof(#channel)], \
+ &__mstrtab_##channel##_##name[sizeof(#channel) + \
+ sizeof(#name)], \
+ 0, 0, 0, 0, marker_probe_cb, \
+ { __mark_empty_function, NULL}, \
+ NULL, tp_name_str, tp_cb }
+
+#define DEFINE_MARKER(channel, name, format) \
+ _DEFINE_MARKER(channel, name, NULL, NULL, format)
+
+#define DEFINE_MARKER_TP(channel, name, tp_name, tp_cb, format) \
+ _DEFINE_MARKER(channel, name, #tp_name, tp_cb, format)
+
+/*
+ * Make sure the alignment of the structure in the __markers section will
+ * not add unwanted padding between the beginning of the section and the
+ * structure. Force alignment to the same alignment as the section start.
+ *
+ * The "generic" argument controls which marker enabling mechanism must be used.
+ * If generic is true, a variable read is used.
+ * If generic is false, immediate values are used.
+ */
+#define __trace_mark(generic, channel, name, call_private, format, args...) \
+ do { \
+ DEFINE_MARKER(channel, name, format); \
+ __mark_check_format(format, ## args); \
+ if (!generic) { \
+ if (unlikely(imv_read( \
+ __mark_##channel##_##name.state))) \
+ (*__mark_##channel##_##name.call) \
+ (&__mark_##channel##_##name, \
+ call_private, ## args); \
+ } else { \
+ if (unlikely(_imv_read( \
+ __mark_##channel##_##name.state))) \
+ (*__mark_##channel##_##name.call) \
+ (&__mark_##channel##_##name, \
+ call_private, ## args); \
+ } \
+ } while (0)
+
+#define __trace_mark_tp(channel, name, call_private, tp_name, tp_cb, \
+ format, args...) \
+ do { \
+ void __check_tp_type(void) \
+ { \
+ register_trace_##tp_name(tp_cb, NULL); \
+ } \
+ DEFINE_MARKER_TP(channel, name, tp_name, tp_cb, format);\
+ __mark_check_format(format, ## args); \
+ (*__mark_##channel##_##name.call)(&__mark_##channel##_##name, \
+ call_private, ## args); \
+ } while (0)
+
+extern void marker_update_probe_range(struct marker *begin,
+ struct marker *end);
+
+#define GET_MARKER(channel, name) (__mark_##channel##_##name)
+
+#else /* !CONFIG_MARKERS */
+#define DEFINE_MARKER(channel, name, tp_name, tp_cb, format)
+#define __trace_mark(generic, channel, name, call_private, format, args...) \
+ __mark_check_format(format, ## args)
+#define __trace_mark_tp(channel, name, call_private, tp_name, tp_cb, \
+ format, args...) \
+ do { \
+ void __check_tp_type(void) \
+ { \
+ register_trace_##tp_name(tp_cb, NULL); \
+ } \
+ __mark_check_format(format, ## args); \
+ } while (0)
+static inline void marker_update_probe_range(struct marker *begin,
+ struct marker *end)
+{ }
+#define GET_MARKER(channel, name)
+#endif /* CONFIG_MARKERS */
+
+/**
+ * trace_mark - Marker using code patching
+ * @channel: marker channel (where to send the data), not quoted.
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using optimized code patching technique (imv_read())
+ * to be enabled when immediate values are present.
+ */
+#define trace_mark(channel, name, format, args...) \
+ __trace_mark(0, channel, name, NULL, format, ## args)
+
+/**
+ * _trace_mark - Marker using variable read
+ * @channel: marker channel (where to send the data), not quoted.
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker using a standard memory read (_imv_read()) to be
+ * enabled. Should be used for markers in code paths where instruction
+ * modification based enabling is not welcome. (__init and __exit functions,
+ * lockdep, some traps, printk).
+ */
+#define _trace_mark(channel, name, format, args...) \
+ __trace_mark(1, channel, name, NULL, format, ## args)
+
+/**
+ * trace_mark_tp - Marker in a tracepoint callback
+ * @channel: marker channel (where to send the data), not quoted.
+ * @name: marker name, not quoted.
+ * @tp_name: tracepoint name, not quoted.
+ * @tp_cb: tracepoint callback. Should have an associated global symbol so it
+ * is not optimized away by the compiler (should not be static).
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker in a tracepoint callback.
+ */
+#define trace_mark_tp(channel, name, tp_name, tp_cb, format, args...) \
+ __trace_mark_tp(channel, name, NULL, tp_name, tp_cb, format, ## args)
+
+/**
+ * MARK_NOARGS - Format string for a marker with no argument.
+ */
+#define MARK_NOARGS " "
+
+extern void lock_markers(void);
+extern void unlock_markers(void);
+
+extern void markers_compact_event_ids(void);
+
+/* To be used for string format validity checking with gcc */
+static inline void __printf(1, 2) ___mark_check_format(const char *fmt, ...)
+{
+}
+
+#define __mark_check_format(format, args...) \
+ do { \
+ if (0) \
+ ___mark_check_format(format, ## args); \
+ } while (0)
+
+extern marker_probe_func __mark_empty_function;
+
+extern void marker_probe_cb(const struct marker *mdata,
+ void *call_private, ...);
+
+/*
+ * Connect a probe to a marker.
+ * private data pointer must be a valid allocated memory address, or NULL.
+ */
+extern int marker_probe_register(const char *channel, const char *name,
+ const char *format, marker_probe_func *probe, void *probe_private);
+
+/*
+ * Returns the private data given to marker_probe_register.
+ */
+extern int marker_probe_unregister(const char *channel, const char *name,
+ marker_probe_func *probe, void *probe_private);
+/*
+ * Unregister a marker by providing the registered private data.
+ */
+extern int marker_probe_unregister_private_data(marker_probe_func *probe,
+ void *probe_private);
+
+extern void *marker_get_private_data(const char *channel, const char *name,
+ marker_probe_func *probe, int num);
+
+const char *marker_get_name_from_id(u16 channel_id, u16 event_id);
+const char *marker_get_fmt_from_id(u16 channel_id, u16 event_id);
+
+/*
+ * marker_synchronize_unregister must be called between the last marker probe
+ * unregistration and the first one of
+ * - the end of module exit function
+ * - the free of any resource used by the probes
+ * to ensure the code and data are valid for any possibly running probes.
+ */
+#define marker_synchronize_unregister() synchronize_sched()
+
+struct marker_iter {
+ struct module *module;
+ struct marker *marker;
+};
+
+extern void marker_iter_start(struct marker_iter *iter);
+extern void marker_iter_next(struct marker_iter *iter);
+extern void marker_iter_stop(struct marker_iter *iter);
+extern void marker_iter_reset(struct marker_iter *iter);
+extern int marker_get_iter_range(struct marker **marker, struct marker *begin,
+ struct marker *end);
+extern int _is_marker_enabled(const char *channel, const char *name);
+extern int is_marker_enabled(const char *channel, const char *name);
+extern int is_marker_present(const char *channel, const char *name);
+extern void marker_update_probes(void);
+
+#endif
diff --git a/include/linux/module.h b/include/linux/module.h
index 5de42043dff..35c96eed6dc 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -15,6 +15,7 @@
#include <linux/stringify.h>
#include <linux/kobject.h>
#include <linux/moduleparam.h>
+#include <linux/marker.h>
#include <linux/tracepoint.h>
#include <linux/percpu.h>
@@ -376,6 +377,10 @@ struct module
/* The command line arguments (may be mangled). People like
keeping pointers to this stuff */
char *args;
+#ifdef CONFIG_MARKERS
+ struct marker *markers;
+ unsigned int num_markers;
+#endif
#ifdef CONFIG_TRACEPOINTS
struct tracepoint * const *tracepoints_ptrs;
unsigned int num_tracepoints;
@@ -574,6 +579,10 @@ int register_module_notifier(struct notifier_block * nb);
int unregister_module_notifier(struct notifier_block * nb);
extern void print_modules(void);
+extern void list_modules(void *call_data);
+
+extern void module_update_markers(void);
+extern int module_get_iter_markers(struct marker_iter *iter);
extern void module_update_tracepoints(void);
extern int module_get_iter_tracepoints(struct tracepoint_iter *iter);
@@ -694,6 +703,14 @@ static inline void print_modules(void)
{
}
+static inline void list_modules(void *call_data)
+{
+}
+
+static inline void module_update_markers(void)
+{
+}
+
static inline void module_update_tracepoints(void)
{
}
@@ -702,6 +719,12 @@ static inline int module_get_iter_tracepoints(struct tracepoint_iter *iter)
{
return 0;
}
+
+static inline int module_get_iter_markers(struct marker_iter *iter)
+{
+ return 0;
+}
+
#endif /* CONFIG_MODULES */
#ifdef CONFIG_SYSFS
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d971346b034..f97738da204 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -44,6 +44,7 @@
#include <linux/rculist.h>
#include <linux/dmaengine.h>
#include <linux/workqueue.h>
+#include <trace/net.h>
#include <linux/ethtool.h>
#include <net/net_namespace.h>
diff --git a/include/linux/poll.h b/include/linux/poll.h
index 1a2ccd6f382..09e1375d768 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -81,6 +81,8 @@ static inline int poll_schedule(struct poll_wqueues *pwq, int state)
return poll_schedule_timeout(pwq, state, NULL, 0);
}
+extern void poll_wait_set_exclusive(poll_table *p);
+
/*
* Scaleable version of the fd_set.
*/
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 2dea94fc440..ad33cb475d0 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -241,6 +241,16 @@ static inline void list_splice_init_rcu(struct list_head *list,
#define list_first_entry_rcu(ptr, type, member) \
list_entry_rcu((ptr)->next, type, member)
+#define __list_for_each_rcu(pos, head) \
+ for (pos = rcu_dereference_raw(list_next_rcu(head)); \
+ pos != (head); \
+ pos = rcu_dereference_raw(list_next_rcu((pos)))
+
+#define __list_for_each_entry_rcu(pos, head, member) \
+ for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
+
/**
* list_for_each_entry_rcu - iterate over rcu list of given type
* @pos: the type * to use as a loop cursor.
@@ -288,6 +298,23 @@ static inline void list_splice_init_rcu(struct list_head *list,
pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
/**
+ * list_for_each_entry_continue_rcu - continue iteration over typed rcu list
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_struct within the struct.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as list_add_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ * It continues an iteration initiated by list_for_each_entry_rcu().
+ */
+#define list_for_each_entry_continue_rcu(pos, head, member) \
+ for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \
+ prefetch(pos->member.next), &pos->member != (head); \
+ pos = list_entry_rcu(pos->member.next, typeof(*pos), member))
+
+
+/**
* hlist_del_rcu - deletes entry from hash list without re-initialization
* @n: the element to delete from the hash list.
*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 777d8a5ed06..d6cde3a56cc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2620,6 +2620,9 @@ static inline unsigned long rlimit_max(unsigned int limit)
return task_rlimit_max(current, limit);
}
+extern void clear_kernel_trace_flag_all_tasks(void);
+extern void set_kernel_trace_flag_all_tasks(void);
+
#endif /* __KERNEL__ */
#endif
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 03c0232b416..34f8680a7b9 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -153,4 +153,25 @@ extern struct hlist_node *seq_hlist_start_head_rcu(struct hlist_head *head,
extern struct hlist_node *seq_hlist_next_rcu(void *v,
struct hlist_head *head,
loff_t *ppos);
+
+/*
+ * Helpers for iteration over a list sorted by ascending head pointer address.
+ * To be used in contexts where preemption cannot be disabled to insure to
+ * continue iteration on a modified list starting at the same location where it
+ * stopped, or at a following location. It insures that the lost information
+ * will only be in elements added/removed from the list between iterations.
+ * void *pos is only used to get the next list element and may not be a valid
+ * list_head anymore when given to seq_sorted_list_start() or
+ * seq_sorted_list_start_head().
+ */
+extern struct list_head *seq_sorted_list_start(struct list_head *head,
+ loff_t *ppos);
+extern struct list_head *seq_sorted_list_start_head(struct list_head *head,
+ loff_t *ppos);
+/*
+ * next must be called with an existing p node
+ */
+extern struct list_head *seq_sorted_list_next(void *p, struct list_head *head,
+ loff_t *ppos);
+
#endif
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 4d559325d91..3d2cd9b993c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -341,6 +341,7 @@ extern int swap_type_of(dev_t, sector_t, struct block_device **);
extern unsigned int count_swap_pages(int, int);
extern sector_t map_swap_page(struct page *, struct block_device **);
extern sector_t swapdev_block(int, pgoff_t);
+extern struct swap_info_struct *get_swap_info_struct(unsigned);
extern int reuse_swap_page(struct page *);
extern int try_to_free_swap(struct page *);
struct backing_dev_info;
@@ -384,6 +385,8 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
}
#endif
+extern void ltt_dump_swap_files(void *call_data);
+
#else /* CONFIG_SWAP */
#define nr_swap_pages 0L
@@ -508,6 +511,10 @@ mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep)
}
#endif
+static inline void ltt_dump_swap_files(void *call_data)
+{
+}
+
#endif /* CONFIG_SWAP */
#endif /* __KERNEL__*/
#endif /* _LINUX_SWAP_H */
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index cd42e30b7c6..436a327d803 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -76,6 +76,14 @@ static inline pte_t swp_entry_to_pte(swp_entry_t entry)
return __swp_entry_to_pte(arch_entry);
}
+static inline swp_entry_t page_swp_entry(struct page *page)
+{
+ swp_entry_t entry;
+ VM_BUG_ON(!PageSwapCache(page));
+ entry.val = page_private(page);
+ return entry;
+}
+
#ifdef CONFIG_MIGRATION
static inline swp_entry_t make_migration_entry(struct page *page, int write)
{
diff --git a/include/linux/time.h b/include/linux/time.h
index 1e6d3b59238..8ae676f1e7c 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -292,6 +292,8 @@ struct itimerval {
#define CLOCK_MONOTONIC_RAW 4
#define CLOCK_REALTIME_COARSE 5
#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_TRACE_FREQ 14
+#define CLOCK_TRACE 15
/*
* The IDs of various hardware clocks:
diff --git a/include/linux/trace-clock.h b/include/linux/trace-clock.h
new file mode 100644
index 00000000000..273991a9638
--- /dev/null
+++ b/include/linux/trace-clock.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_TRACE_CLOCK_H
+#define _LINUX_TRACE_CLOCK_H
+
+/*
+ * Trace clock
+ *
+ * Chooses between an architecture specific clock or an atomic logical clock.
+ *
+ * Copyright (C) 2007,2008 Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ */
+
+#ifdef CONFIG_HAVE_TRACE_CLOCK
+#include <asm/trace-clock.h>
+#else
+#include <asm-generic/trace-clock.h>
+#endif /* CONFIG_HAVE_TRACE_CLOCK */
+#endif /* _LINUX_TRACE_CLOCK_H */
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 425bcfe56c6..f3b763efe11 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -1,8 +1,8 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM timer
-#if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_TIMER_H
+#if !defined(_TRACE_EVENTS_TIMER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EVENTS_TIMER_H
#include <linux/tracepoint.h>
#include <linux/hrtimer.h>
@@ -323,7 +323,7 @@ TRACE_EVENT(itimer_expire,
(int) __entry->pid, (unsigned long long)__entry->now)
);
-#endif /* _TRACE_TIMER_H */
+#endif /* _TRACE_EVENTS_TIMER_H */
/* This part must be outside protection */
#include <trace/define_trace.h>
diff --git a/include/trace/fault.h b/include/trace/fault.h
new file mode 100644
index 00000000000..3277e303fc4
--- /dev/null
+++ b/include/trace/fault.h
@@ -0,0 +1,25 @@
+#ifndef _TRACE_FAULT_H
+#define _TRACE_FAULT_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(page_fault_entry,
+ TP_PROTO(struct pt_regs *regs, int trapnr,
+ struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, int write_access),
+ TP_ARGS(regs, trapnr, mm, vma, address, write_access));
+DECLARE_TRACE(page_fault_exit,
+ TP_PROTO(int res),
+ TP_ARGS(res));
+DECLARE_TRACE(page_fault_nosem_entry,
+ TP_PROTO(struct pt_regs *regs, int trapnr, unsigned long address),
+ TP_ARGS(regs, trapnr, address));
+DECLARE_TRACE_NOARGS(page_fault_nosem_exit);
+DECLARE_TRACE(page_fault_get_user_entry,
+ TP_PROTO(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, int write_access),
+ TP_ARGS(mm, vma, address, write_access));
+DECLARE_TRACE(page_fault_get_user_exit,
+ TP_PROTO(int res),
+ TP_ARGS(res));
+#endif
diff --git a/include/trace/filemap.h b/include/trace/filemap.h
new file mode 100644
index 00000000000..14e90ba9a09
--- /dev/null
+++ b/include/trace/filemap.h
@@ -0,0 +1,19 @@
+#ifndef _TRACE_FILEMAP_H
+#define _TRACE_FILEMAP_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(wait_on_page_start,
+ TP_PROTO(struct page *page, int bit_nr),
+ TP_ARGS(page, bit_nr));
+DECLARE_TRACE(wait_on_page_end,
+ TP_PROTO(struct page *page, int bit_nr),
+ TP_ARGS(page, bit_nr));
+DECLARE_TRACE(add_to_page_cache,
+ TP_PROTO(struct address_space *mapping, pgoff_t offset),
+ TP_ARGS(mapping, offset));
+DECLARE_TRACE(remove_from_page_cache,
+ TP_PROTO(struct address_space *mapping),
+ TP_ARGS(mapping));
+
+#endif
diff --git a/include/trace/fs.h b/include/trace/fs.h
new file mode 100644
index 00000000000..efe7e477dc1
--- /dev/null
+++ b/include/trace/fs.h
@@ -0,0 +1,66 @@
+#ifndef _TRACE_FS_H
+#define _TRACE_FS_H
+
+#include <linux/buffer_head.h>
+#include <linux/time.h>
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(fs_buffer_wait_start,
+ TP_PROTO(struct buffer_head *bh),
+ TP_ARGS(bh));
+DECLARE_TRACE(fs_buffer_wait_end,
+ TP_PROTO(struct buffer_head *bh),
+ TP_ARGS(bh));
+DECLARE_TRACE(fs_exec,
+ TP_PROTO(const char *filename),
+ TP_ARGS(filename));
+DECLARE_TRACE(fs_ioctl,
+ TP_PROTO(unsigned int fd, unsigned int cmd, unsigned long arg),
+ TP_ARGS(fd, cmd, arg));
+DECLARE_TRACE(fs_open,
+ TP_PROTO(int fd, char *filename),
+ TP_ARGS(fd, filename));
+DECLARE_TRACE(fs_close,
+ TP_PROTO(unsigned int fd),
+ TP_ARGS(fd));
+DECLARE_TRACE(fs_lseek,
+ TP_PROTO(unsigned int fd, long offset, unsigned int origin),
+ TP_ARGS(fd, offset, origin));
+DECLARE_TRACE(fs_llseek,
+ TP_PROTO(unsigned int fd, loff_t offset, unsigned int origin),
+ TP_ARGS(fd, offset, origin));
+
+/*
+ * Probes must be aware that __user * may be modified by concurrent userspace
+ * or kernel threads.
+ */
+DECLARE_TRACE(fs_read,
+ TP_PROTO(unsigned int fd, char __user *buf, size_t count, ssize_t ret),
+ TP_ARGS(fd, buf, count, ret));
+DECLARE_TRACE(fs_write,
+ TP_PROTO(unsigned int fd, const char __user *buf, size_t count,
+ ssize_t ret),
+ TP_ARGS(fd, buf, count, ret));
+DECLARE_TRACE(fs_pread64,
+ TP_PROTO(unsigned int fd, char __user *buf, size_t count, loff_t pos,
+ ssize_t ret),
+ TP_ARGS(fd, buf, count, pos, ret));
+DECLARE_TRACE(fs_pwrite64,
+ TP_PROTO(unsigned int fd, const char __user *buf, size_t count,
+ loff_t pos, ssize_t ret),
+ TP_ARGS(fd, buf, count, pos, ret));
+DECLARE_TRACE(fs_readv,
+ TP_PROTO(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, ssize_t ret),
+ TP_ARGS(fd, vec, vlen, ret));
+DECLARE_TRACE(fs_writev,
+ TP_PROTO(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, ssize_t ret),
+ TP_ARGS(fd, vec, vlen, ret));
+DECLARE_TRACE(fs_select,
+ TP_PROTO(int fd, struct timespec *end_time),
+ TP_ARGS(fd, end_time));
+DECLARE_TRACE(fs_poll,
+ TP_PROTO(int fd),
+ TP_ARGS(fd));
+#endif
diff --git a/include/trace/hugetlb.h b/include/trace/hugetlb.h
new file mode 100644
index 00000000000..c18944e3401
--- /dev/null
+++ b/include/trace/hugetlb.h
@@ -0,0 +1,28 @@
+#ifndef _TRACE_HUGETLB_H
+#define _TRACE_HUGETLB_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(hugetlb_page_release,
+ TP_PROTO(struct page *page),
+ TP_ARGS(page));
+DECLARE_TRACE(hugetlb_page_grab,
+ TP_PROTO(struct page *page),
+ TP_ARGS(page));
+DECLARE_TRACE(hugetlb_buddy_pgalloc,
+ TP_PROTO(struct page *page),
+ TP_ARGS(page));
+DECLARE_TRACE(hugetlb_page_alloc,
+ TP_PROTO(struct page *page),
+ TP_ARGS(page));
+DECLARE_TRACE(hugetlb_page_free,
+ TP_PROTO(struct page *page),
+ TP_ARGS(page));
+DECLARE_TRACE(hugetlb_pages_reserve,
+ TP_PROTO(struct inode *inode, long from, long to, int ret),
+ TP_ARGS(inode, from, to, ret));
+DECLARE_TRACE(hugetlb_pages_unreserve,
+ TP_PROTO(struct inode *inode, long offset, long freed),
+ TP_ARGS(inode, offset, freed));
+
+#endif
diff --git a/include/trace/ipc.h b/include/trace/ipc.h
new file mode 100644
index 00000000000..ea9dac190c3
--- /dev/null
+++ b/include/trace/ipc.h
@@ -0,0 +1,18 @@
+#ifndef _TRACE_IPC_H
+#define _TRACE_IPC_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(ipc_msg_create,
+ TP_PROTO(long id, int flags),
+ TP_ARGS(id, flags));
+DECLARE_TRACE(ipc_sem_create,
+ TP_PROTO(long id, int flags),
+ TP_ARGS(id, flags));
+DECLARE_TRACE(ipc_shm_create,
+ TP_PROTO(long id, int flags),
+ TP_ARGS(id, flags));
+DECLARE_TRACE(ipc_call,
+ TP_PROTO(unsigned int call, unsigned int first),
+ TP_ARGS(call, first));
+#endif
diff --git a/include/trace/ipv4.h b/include/trace/ipv4.h
new file mode 100644
index 00000000000..388908a788e
--- /dev/null
+++ b/include/trace/ipv4.h
@@ -0,0 +1,14 @@
+#ifndef _TRACE_IPV4_H
+#define _TRACE_IPV4_H
+
+#include <linux/inetdevice.h>
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(ipv4_addr_add,
+ TP_PROTO(struct in_ifaddr *ifa),
+ TP_ARGS(ifa));
+DECLARE_TRACE(ipv4_addr_del,
+ TP_PROTO(struct in_ifaddr *ifa),
+ TP_ARGS(ifa));
+
+#endif
diff --git a/include/trace/ipv6.h b/include/trace/ipv6.h
new file mode 100644
index 00000000000..ffb9b113048
--- /dev/null
+++ b/include/trace/ipv6.h
@@ -0,0 +1,14 @@
+#ifndef _TRACE_IPV6_H
+#define _TRACE_IPV6_H
+
+#include <net/if_inet6.h>
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(ipv6_addr_add,
+ TP_PROTO(struct inet6_ifaddr *ifa),
+ TP_ARGS(ifa));
+DECLARE_TRACE(ipv6_addr_del,
+ TP_PROTO(struct inet6_ifaddr *ifa),
+ TP_ARGS(ifa));
+
+#endif
diff --git a/include/trace/irq.h b/include/trace/irq.h
new file mode 100644
index 00000000000..58d50636936
--- /dev/null
+++ b/include/trace/irq.h
@@ -0,0 +1,31 @@
+#ifndef _LTTNG_TRACE_IRQ_H
+#define _LTTNG_TRACE_IRQ_H
+
+#include <linux/kdebug.h>
+#include <linux/interrupt.h>
+
+/*
+ * action can be NULL if not available.
+ */
+DECLARE_TRACE(irq_entry,
+ TP_PROTO(unsigned int id, struct pt_regs *regs,
+ struct irqaction *action),
+ TP_ARGS(id, regs, action));
+DECLARE_TRACE(irq_exit,
+ TP_PROTO(irqreturn_t retval),
+ TP_ARGS(retval));
+
+DECLARE_TRACE(irq_tasklet_low_entry,
+ TP_PROTO(struct tasklet_struct *t),
+ TP_ARGS(t));
+DECLARE_TRACE(irq_tasklet_low_exit,
+ TP_PROTO(struct tasklet_struct *t),
+ TP_ARGS(t));
+DECLARE_TRACE(irq_tasklet_high_entry,
+ TP_PROTO(struct tasklet_struct *t),
+ TP_ARGS(t));
+DECLARE_TRACE(irq_tasklet_high_exit,
+ TP_PROTO(struct tasklet_struct *t),
+ TP_ARGS(t));
+
+#endif
diff --git a/include/trace/kernel.h b/include/trace/kernel.h
new file mode 100644
index 00000000000..ca61c54525b
--- /dev/null
+++ b/include/trace/kernel.h
@@ -0,0 +1,31 @@
+#ifndef _TRACE_KERNEL_H
+#define _TRACE_KERNEL_H
+
+#include <linux/tracepoint.h>
+#include <linux/kexec.h>
+
+struct kimage;
+
+DECLARE_TRACE(kernel_printk,
+ TP_PROTO(unsigned long retaddr),
+ TP_ARGS(retaddr));
+DECLARE_TRACE(kernel_vprintk,
+ TP_PROTO(unsigned long retaddr, char *buf, int len),
+ TP_ARGS(retaddr, buf, len));
+DECLARE_TRACE(kernel_module_free,
+ TP_PROTO(struct module *mod),
+ TP_ARGS(mod));
+DECLARE_TRACE(kernel_module_load,
+ TP_PROTO(struct module *mod),
+ TP_ARGS(mod));
+DECLARE_TRACE(kernel_panic,
+ TP_PROTO(const char *fmt, va_list args),
+ TP_ARGS(fmt, args));
+DECLARE_TRACE(kernel_kernel_kexec,
+ TP_PROTO(struct kimage *image),
+ TP_ARGS(image));
+DECLARE_TRACE(kernel_crash_kexec,
+ TP_PROTO(struct kimage *image, struct pt_regs *regs),
+ TP_ARGS(image, regs));
+
+#endif
diff --git a/include/trace/lockdep.h b/include/trace/lockdep.h
new file mode 100644
index 00000000000..dbd4629d0f8
--- /dev/null
+++ b/include/trace/lockdep.h
@@ -0,0 +1,37 @@
+#ifndef _LTTNG_TRACE_LOCKDEP_H
+#define _LTTNG_TRACE_LOCKDEP_H
+
+#include <linux/lockdep.h>
+#include <linux/tracepoint.h>
+
+/*
+ * lockdep tracing must be very careful with respect to reentrancy.
+ *
+ * It should not use immediate values for activation because it involves
+ * traps called when the code patching is done.
+ */
+DECLARE_TRACE(lockdep_hardirqs_on,
+ TP_PROTO(unsigned long retaddr),
+ TP_ARGS(retaddr));
+DECLARE_TRACE(lockdep_hardirqs_off,
+ TP_PROTO(unsigned long retaddr),
+ TP_ARGS(retaddr));
+DECLARE_TRACE(lockdep_softirqs_on,
+ TP_PROTO(unsigned long retaddr),
+ TP_ARGS(retaddr));
+DECLARE_TRACE(lockdep_softirqs_off,
+ TP_PROTO(unsigned long retaddr),
+ TP_ARGS(retaddr));
+
+/* FIXME : some duplication with lockdep TRACE EVENTs */
+DECLARE_TRACE(lockdep_lock_acquire,
+ TP_PROTO(unsigned long retaddr, unsigned int subclass,
+ struct lockdep_map *lock, int trylock, int read,
+ int hardirqs_off),
+ TP_ARGS(retaddr, subclass, lock, trylock, read, hardirqs_off));
+DECLARE_TRACE(lockdep_lock_release,
+ TP_PROTO(unsigned long retaddr, struct lockdep_map *lock, int nested),
+ TP_ARGS(retaddr, lock, nested));
+
+
+#endif /* _LTTNG_TRACE_LOCKDEP_H */
diff --git a/include/trace/net.h b/include/trace/net.h
new file mode 100644
index 00000000000..91a0f02d0bc
--- /dev/null
+++ b/include/trace/net.h
@@ -0,0 +1,40 @@
+#ifndef _TRACE_LTTNG_NET_H
+#define _TRACE_LTTNG_NET_H
+
+#include <linux/tracepoint.h>
+
+struct sk_buff;
+DECLARE_TRACE(lttng_net_dev_xmit,
+ TP_PROTO(struct sk_buff *skb),
+ TP_ARGS(skb));
+DECLARE_TRACE(lttng_net_dev_receive,
+ TP_PROTO(struct sk_buff *skb),
+ TP_ARGS(skb));
+DECLARE_TRACE(net_tcpv4_rcv,
+ TP_PROTO(struct sk_buff *skb),
+ TP_ARGS(skb));
+DECLARE_TRACE(net_udpv4_rcv,
+ TP_PROTO(struct sk_buff *skb),
+ TP_ARGS(skb));
+
+/*
+ * Note these first 2 traces are actually in __napi_schedule and net_rx_action
+ * respectively. The former is in __napi_schedule because it uses at-most-once
+ * logic and placing it in the calling routine (napi_schedule) would produce
+ * countless trace events that were effectively no-ops. napi_poll is
+ * implemented in net_rx_action, because thats where we do our polling on
+ * devices. The last trace point is in napi_complete, right where you would
+ * think it would be.
+ */
+struct napi_struct;
+DECLARE_TRACE(net_napi_schedule,
+ TP_PROTO(struct napi_struct *n),
+ TP_ARGS(n));
+DECLARE_TRACE(net_napi_poll,
+ TP_PROTO(struct napi_struct *n),
+ TP_ARGS(n));
+DECLARE_TRACE(net_napi_complete,
+ TP_PROTO(struct napi_struct *n),
+ TP_ARGS(n));
+
+#endif
diff --git a/include/trace/page_alloc.h b/include/trace/page_alloc.h
new file mode 100644
index 00000000000..c30a389ea91
--- /dev/null
+++ b/include/trace/page_alloc.h
@@ -0,0 +1,16 @@
+#ifndef _TRACE_PAGE_ALLOC_H
+#define _TRACE_PAGE_ALLOC_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * mm_page_alloc : page can be NULL.
+ */
+DECLARE_TRACE(page_alloc,
+ TP_PROTO(struct page *page, unsigned int order),
+ TP_ARGS(page, order));
+DECLARE_TRACE(page_free,
+ TP_PROTO(struct page *page, unsigned int order),
+ TP_ARGS(page, order));
+
+#endif
diff --git a/include/trace/pm.h b/include/trace/pm.h
new file mode 100644
index 00000000000..84078bbe1db
--- /dev/null
+++ b/include/trace/pm.h
@@ -0,0 +1,11 @@
+#ifndef _TRACE_PM_H
+#define _TRACE_PM_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE_NOARGS(pm_idle_entry);
+DECLARE_TRACE_NOARGS(pm_idle_exit);
+DECLARE_TRACE_NOARGS(pm_suspend_entry);
+DECLARE_TRACE_NOARGS(pm_suspend_exit);
+
+#endif
diff --git a/include/trace/rcu.h b/include/trace/rcu.h
new file mode 100644
index 00000000000..f551c2ca9a0
--- /dev/null
+++ b/include/trace/rcu.h
@@ -0,0 +1,43 @@
+#ifndef _TRACE_RCU_H
+#define _TRACE_RCU_H
+
+#include <linux/tracepoint.h>
+#include <linux/rcupdate.h>
+
+DECLARE_TRACE(rcu_classic_callback,
+ TP_PROTO(struct rcu_head *head),
+ TP_ARGS(head));
+
+DECLARE_TRACE(rcu_classic_call_rcu,
+ TP_PROTO(struct rcu_head *head, unsigned long ip),
+ TP_ARGS(head, ip));
+
+DECLARE_TRACE(rcu_classic_call_rcu_bh,
+ TP_PROTO(struct rcu_head *head, unsigned long ip),
+ TP_ARGS(head, ip));
+
+DECLARE_TRACE(rcu_preempt_callback,
+ TP_PROTO(struct rcu_head *head),
+ TP_ARGS(head));
+
+DECLARE_TRACE(rcu_preempt_call_rcu,
+ TP_PROTO(struct rcu_head *head, unsigned long ip),
+ TP_ARGS(head, ip));
+
+DECLARE_TRACE(rcu_preempt_call_rcu_sched,
+ TP_PROTO(struct rcu_head *head, unsigned long ip),
+ TP_ARGS(head, ip));
+
+DECLARE_TRACE(rcu_tree_callback,
+ TP_PROTO(struct rcu_head *head),
+ TP_ARGS(head));
+
+DECLARE_TRACE(rcu_tree_call_rcu,
+ TP_PROTO(struct rcu_head *head, unsigned long ip),
+ TP_ARGS(head, ip));
+
+DECLARE_TRACE(rcu_tree_call_rcu_bh,
+ TP_PROTO(struct rcu_head *head, unsigned long ip),
+ TP_ARGS(head, ip));
+
+#endif
diff --git a/include/trace/sched.h b/include/trace/sched.h
new file mode 100644
index 00000000000..a4b0307c4d6
--- /dev/null
+++ b/include/trace/sched.h
@@ -0,0 +1,11 @@
+#ifndef _LTTNG_TRACE_SCHED_H
+#define _LTTNG_TRACE_SCHED_H
+
+#include <linux/sched.h>
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(sched_kthread_create,
+ TP_PROTO(void *fn, int pid),
+ TP_ARGS(fn, pid));
+
+#endif /* _LTTNG_TRACE_SCHED_H */
diff --git a/include/trace/socket.h b/include/trace/socket.h
new file mode 100644
index 00000000000..4e8a324575d
--- /dev/null
+++ b/include/trace/socket.h
@@ -0,0 +1,77 @@
+#ifndef _TRACE_SOCKET_H
+#define _TRACE_SOCKET_H
+
+#include <net/sock.h>
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(socket_create,
+ TP_PROTO(int family, int type, int protocol, struct socket *sock,
+ int ret),
+ TP_ARGS(family, type, protocol, sock, ret));
+
+DECLARE_TRACE(socket_bind,
+ TP_PROTO(int fd, struct sockaddr __user *umyaddr, int addrlen, int ret),
+ TP_ARGS(fd, umyaddr, addrlen, ret));
+
+DECLARE_TRACE(socket_connect,
+ TP_PROTO(int fd, struct sockaddr __user *uservaddr, int addrlen,
+ int ret),
+ TP_ARGS(fd, uservaddr, addrlen, ret));
+
+DECLARE_TRACE(socket_listen,
+ TP_PROTO(int fd, int backlog, int ret),
+ TP_ARGS(fd, backlog, ret));
+
+DECLARE_TRACE(socket_accept,
+ TP_PROTO(int fd, struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags, int ret),
+ TP_ARGS(fd, upeer_sockaddr, upeer_addrlen, flags, ret));
+
+DECLARE_TRACE(socket_getsockname,
+ TP_PROTO(int fd, struct sockaddr __user *usockaddr,
+ int __user *usockaddr_len, int ret),
+ TP_ARGS(fd, usockaddr, usockaddr_len, ret));
+
+DECLARE_TRACE(socket_getpeername,
+ TP_PROTO(int fd, struct sockaddr __user *usockaddr,
+ int __user *usockaddr_len, int ret),
+ TP_ARGS(fd, usockaddr, usockaddr_len, ret));
+
+DECLARE_TRACE(socket_socketpair,
+ TP_PROTO(int family, int type, int protocol, int __user *usockvec,
+ int ret),
+ TP_ARGS(family, type, protocol, usockvec, ret));
+
+DECLARE_TRACE(socket_sendmsg,
+ TP_PROTO(struct socket *sock, struct msghdr *msg, size_t size, int ret),
+ TP_ARGS(sock, msg, size, ret));
+
+DECLARE_TRACE(socket_recvmsg,
+ TP_PROTO(struct socket *sock, struct msghdr *msg, size_t size,
+ int flags, int ret),
+ TP_ARGS(sock, msg, size, flags, ret));
+
+DECLARE_TRACE(socket_setsockopt,
+ TP_PROTO(int fd, int level, int optname, char __user *optval,
+ int optlen, int ret),
+ TP_ARGS(fd, level, optname, optval, optlen, ret));
+
+DECLARE_TRACE(socket_getsockopt,
+ TP_PROTO(int fd, int level, int optname, char __user *optval,
+ int __user *optlen, int ret),
+ TP_ARGS(fd, level, optname, optval, optlen, ret));
+
+DECLARE_TRACE(socket_shutdown,
+ TP_PROTO(int fd, int how, int ret),
+ TP_ARGS(fd, how, ret));
+
+/*
+ * socket_call
+ *
+ * We also trace socket_call so we can know which syscall is used by user
+ * (socket_call or sock_send...)
+ */
+DECLARE_TRACE(socket_call,
+ TP_PROTO(int call, unsigned long a0),
+ TP_ARGS(call, a0));
+#endif
diff --git a/include/trace/swap.h b/include/trace/swap.h
new file mode 100644
index 00000000000..bd035a7204e
--- /dev/null
+++ b/include/trace/swap.h
@@ -0,0 +1,20 @@
+#ifndef _TRACE_SWAP_H
+#define _TRACE_SWAP_H
+
+#include <linux/swap.h>
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(swap_in,
+ TP_PROTO(struct page *page, swp_entry_t entry),
+ TP_ARGS(page, entry));
+DECLARE_TRACE(swap_out,
+ TP_PROTO(struct page *page),
+ TP_ARGS(page));
+DECLARE_TRACE(swap_file_open,
+ TP_PROTO(struct file *file, char *filename),
+ TP_ARGS(file, filename));
+DECLARE_TRACE(swap_file_close,
+ TP_PROTO(struct file *file),
+ TP_ARGS(file));
+
+#endif
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 31966a4fb8c..2f40e5e65a2 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -6,6 +6,7 @@
#include <linux/ftrace_event.h>
#include <asm/ptrace.h>
+#include <linux/tracepoint.h>
/*
@@ -54,4 +55,11 @@ int perf_sysexit_enable(struct ftrace_event_call *call);
void perf_sysexit_disable(struct ftrace_event_call *call);
#endif
+DECLARE_TRACE(syscall_entry,
+ TP_PROTO(struct pt_regs *regs, long id),
+ TP_ARGS(regs, id));
+DECLARE_TRACE(syscall_exit,
+ TP_PROTO(long ret),
+ TP_ARGS(ret));
+
#endif /* _TRACE_SYSCALL_H */
diff --git a/include/trace/timer.h b/include/trace/timer.h
new file mode 100644
index 00000000000..9b2a852ca21
--- /dev/null
+++ b/include/trace/timer.h
@@ -0,0 +1,24 @@
+#ifndef _TRACE_TIMER_H
+#define _TRACE_TIMER_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(timer_itimer_expired,
+ TP_PROTO(struct signal_struct *sig),
+ TP_ARGS(sig));
+DECLARE_TRACE(timer_itimer_set,
+ TP_PROTO(int which, struct itimerval *value),
+ TP_ARGS(which, value));
+DECLARE_TRACE(timer_set,
+ TP_PROTO(struct timer_list *timer),
+ TP_ARGS(timer));
+/*
+ * xtime_lock is taken when kernel_timer_update_time tracepoint is reached.
+ */
+DECLARE_TRACE(timer_update_time,
+ TP_PROTO(struct timespec *_xtime, struct timespec *_wall_to_monotonic),
+ TP_ARGS(_xtime, _wall_to_monotonic));
+DECLARE_TRACE(timer_timeout,
+ TP_PROTO(struct task_struct *p),
+ TP_ARGS(p));
+#endif
diff --git a/include/trace/trap.h b/include/trace/trap.h
new file mode 100644
index 00000000000..1b70c049960
--- /dev/null
+++ b/include/trace/trap.h
@@ -0,0 +1,11 @@
+#ifndef _TRACE_TRAP_H
+#define _TRACE_TRAP_H
+
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(trap_entry,
+ TP_PROTO(struct pt_regs *regs, long id),
+ TP_ARGS(regs, id));
+DECLARE_TRACE_NOARGS(trap_exit);
+
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index be788c0957d..64da1a37121 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -797,6 +797,41 @@ config NET_NS
endif # NAMESPACES
+#
+# Architectures with a 64-bits get_cycles() should select this.
+# They should also define
+# get_cycles_barrier() : instruction synchronization barrier if required
+# get_cycles_rate() : cycle counter rate, in HZ. If 0, TSC are not synchronized
+# across CPUs or their frequency may vary due to frequency scaling.
+#
+config HAVE_GET_CYCLES
+ def_bool n
+
+#
+# Architectures with a specialized tracing clock should select this.
+#
+config HAVE_TRACE_CLOCK
+ def_bool n
+
+config HAVE_TRACE_CLOCK_GENERIC
+ bool
+ default y if (!HAVE_TRACE_CLOCK)
+ default n if HAVE_TRACE_CLOCK
+ select HAVE_TRACE_CLOCK_32_TO_64 if (!64BIT)
+
+#
+# Architectures with only a 32-bits clock source should select this.
+#
+config HAVE_TRACE_CLOCK_32_TO_64
+ def_bool n
+
+#
+# Architectures which need to dynamically detect if their TSC is unsynchronized
+# across cpus should select this.
+#
+config HAVE_UNSYNCHRONIZED_TSC
+ def_bool n
+
config SCHED_AUTOGROUP
bool "Automatic process group scheduling"
select EVENTFD
@@ -1263,8 +1298,18 @@ config PROFILING
config TRACEPOINTS
bool
+config MARKERS
+ bool "Activate markers"
+ select TRACEPOINTS
+ help
+ Place an empty function call at each marker site. Can be
+ dynamically changed for a probe function.
+
source "arch/Kconfig"
+config HAVE_LTT_DUMP_TABLES
+ def_bool n
+
endmenu # General setup
config HAVE_GENERIC_DMA_COHERENT
diff --git a/ipc/msg.c b/ipc/msg.c
index 747b65507a9..6a0500c7b47 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -37,6 +37,7 @@
#include <linux/rwsem.h>
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
+#include <trace/ipc.h>
#include <asm/current.h>
#include <asm/uaccess.h>
@@ -71,6 +72,8 @@ struct msg_sender {
#define msg_unlock(msq) ipc_unlock(&(msq)->q_perm)
+DEFINE_TRACE(ipc_msg_create);
+
static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
static int newque(struct ipc_namespace *, struct ipc_params *);
#ifdef CONFIG_PROC_FS
@@ -314,6 +317,7 @@ SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
struct ipc_namespace *ns;
struct ipc_ops msg_ops;
struct ipc_params msg_params;
+ long ret;
ns = current->nsproxy->ipc_ns;
@@ -324,7 +328,9 @@ SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg)
msg_params.key = key;
msg_params.flg = msgflg;
- return ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
+ ret = ipcget(ns, &msg_ids(ns), &msg_ops, &msg_params);
+ trace_ipc_msg_create(ret, msgflg);
+ return ret;
}
static inline unsigned long
diff --git a/ipc/sem.c b/ipc/sem.c
index 0e0d49bbb86..32026ae7aa1 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -86,6 +86,7 @@
#include <linux/rwsem.h>
#include <linux/nsproxy.h>
#include <linux/ipc_namespace.h>
+#include <trace/ipc.h>
#include <asm/uaccess.h>
#include "util.h"
@@ -118,6 +119,8 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
#define sc_semopm sem_ctls[2]
#define sc_semmni sem_ctls[3]
+DEFINE_TRACE(ipc_sem_create);
+
void sem_init_ns(struct ipc_namespace *ns)
{
ns->sc_semmsl = SEMMSL;
@@ -323,6 +326,7 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
struct ipc_namespace *ns;
struct ipc_ops sem_ops;
struct ipc_params sem_params;
+ long err;
ns = current->nsproxy->ipc_ns;
@@ -337,7 +341,9 @@ SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg)
sem_params.flg = semflg;
sem_params.u.nsems = nsems;
- return ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
+ err = ipcget(ns, &sem_ids(ns), &sem_ops, &sem_params);
+ trace_ipc_sem_create(err, semflg);
+ return err;
}
/*
diff --git a/ipc/shm.c b/ipc/shm.c
index 7d3bb22a930..f9761bb2d11 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -39,6 +39,7 @@
#include <linux/nsproxy.h>
#include <linux/mount.h>
#include <linux/ipc_namespace.h>
+#include <trace/ipc.h>
#include <asm/uaccess.h>
@@ -56,6 +57,8 @@ struct shm_file_data {
static const struct file_operations shm_file_operations;
static const struct vm_operations_struct shm_vm_ops;
+DEFINE_TRACE(ipc_shm_create);
+
#define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
#define shm_unlock(shp) \
@@ -456,6 +459,7 @@ SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
struct ipc_namespace *ns;
struct ipc_ops shm_ops;
struct ipc_params shm_params;
+ long err;
ns = current->nsproxy->ipc_ns;
@@ -467,7 +471,9 @@ SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
shm_params.flg = shmflg;
shm_params.u.size = size;
- return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
+ err = ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
+ trace_ipc_shm_create(err, shmflg);
+ return err;
}
static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
diff --git a/ipc/syscall.c b/ipc/syscall.c
index 1d6f53f6b56..daf35cd9e4e 100644
--- a/ipc/syscall.c
+++ b/ipc/syscall.c
@@ -12,6 +12,9 @@
#include <linux/shm.h>
#include <linux/syscalls.h>
#include <linux/uaccess.h>
+#include <trace/ipc.h>
+
+DEFINE_TRACE(ipc_call);
SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
unsigned long, third, void __user *, ptr, long, fifth)
@@ -21,6 +24,8 @@ SYSCALL_DEFINE6(ipc, unsigned int, call, int, first, unsigned long, second,
version = call >> 16; /* hack for backward compatibility */
call &= 0xffff;
+ trace_ipc_call(call, first);
+
switch (call) {
case SEMOP:
return sys_semtimedop(first, (struct sembuf __user *)ptr,
diff --git a/kernel/Makefile b/kernel/Makefile
index 353d3fe8ba3..c039580ba3b 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -91,6 +91,7 @@ obj-$(CONFIG_RELAY) += relay.o
obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
+obj-$(CONFIG_MARKERS) += marker.o
obj-$(CONFIG_TRACEPOINTS) += tracepoint.o
obj-$(CONFIG_LATENCYTOP) += latencytop.o
obj-$(CONFIG_BINFMT_ELF) += elfcore.o
@@ -99,7 +100,10 @@ obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o
obj-$(CONFIG_FUNCTION_TRACER) += trace/
obj-$(CONFIG_TRACING) += trace/
obj-$(CONFIG_X86_DS) += trace/
+obj-$(CONFIG_MARKERS) += ltt-channels.o
obj-$(CONFIG_RING_BUFFER) += trace/
+obj-$(CONFIG_HAVE_TRACE_CLOCK_32_TO_64) += trace/
+obj-$(CONFIG_HAVE_TRACE_CLOCK_GENERIC) += trace/
obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
diff --git a/kernel/exit.c b/kernel/exit.c
index f9a45ebcc7b..0d9a3444614 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -514,6 +514,8 @@ struct files_struct *get_files_struct(struct task_struct *task)
return files;
}
+EXPORT_SYMBOL(get_files_struct);
+
void put_files_struct(struct files_struct *files)
{
struct fdtable *fdt;
@@ -535,6 +537,8 @@ void put_files_struct(struct files_struct *files)
}
}
+EXPORT_SYMBOL(put_files_struct);
+
void reset_files_struct(struct files_struct *files)
{
struct task_struct *tsk = current;
diff --git a/kernel/fork.c b/kernel/fork.c
index 25e429152dd..5bb0bb18434 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -88,6 +88,7 @@ int max_threads; /* tunable limit on nr_threads */
DEFINE_PER_CPU(unsigned long, process_counts) = 0;
__cacheline_aligned DEFINE_RWLOCK(tasklist_lock); /* outer */
+EXPORT_SYMBOL(tasklist_lock);
#ifdef CONFIG_PROVE_RCU
int lockdep_tasklist_lock_is_held(void)
@@ -1250,6 +1251,15 @@ static struct task_struct *copy_process(unsigned long clone_flags,
/* Need tasklist lock for parent etc handling! */
write_lock_irq(&tasklist_lock);
+ /*
+ * The state of the parent's TIF_KTRACE flag may have changed
+ * since it was copied in dup_task_struct() so we re-copy it here.
+ */
+ if (test_thread_flag(TIF_KERNEL_TRACE))
+ set_tsk_thread_flag(p, TIF_KERNEL_TRACE);
+ else
+ clear_tsk_thread_flag(p, TIF_KERNEL_TRACE);
+
/* CLONE_PARENT re-uses the old parent */
if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
p->real_parent = current->real_parent;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 3540a719012..db864334a95 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -17,6 +17,7 @@
#include <linux/kernel_stat.h>
#include <trace/events/irq.h>
+#include <trace/irq.h>
#include "internals.h"
@@ -51,6 +52,9 @@ static void warn_no_thread(unsigned int irq, struct irqaction *action)
"but no thread function available.", irq, action->name);
}
+DEFINE_TRACE(irq_entry);
+DEFINE_TRACE(irq_exit);
+
/**
* handle_IRQ_event - irq action chain handler
* @irq: the interrupt number
@@ -63,6 +67,8 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
irqreturn_t ret, retval = IRQ_NONE;
unsigned int status = 0;
+ trace_irq_entry(irq, NULL, action);
+
do {
trace_irq_handler_entry(irq, action);
ret = action->handler(irq, action->dev_id);
@@ -116,5 +122,7 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
add_interrupt_randomness(irq);
local_irq_disable();
+ trace_irq_exit(retval);
+
return retval;
}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 2039bea31bd..1c07afd307f 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -109,6 +109,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
{
return radix_tree_lookup(&irq_desc_tree, irq);
}
+EXPORT_SYMBOL_GPL(irq_to_desc);
static void delete_irq_desc(unsigned int irq)
{
@@ -273,6 +274,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
{
return (irq < NR_IRQS) ? irq_desc + irq : NULL;
}
+EXPORT_SYMBOL_GPL(irq_to_desc);
struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
{
diff --git a/kernel/itimer.c b/kernel/itimer.c
index d802883153d..18fd8e919c0 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -13,9 +13,13 @@
#include <linux/posix-timers.h>
#include <linux/hrtimer.h>
#include <trace/events/timer.h>
+#include <trace/timer.h>
#include <asm/uaccess.h>
+DEFINE_TRACE(timer_itimer_expired);
+DEFINE_TRACE(timer_itimer_set);
+
/**
* itimer_get_remtime - get remaining time for the timer
*
@@ -124,6 +128,7 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer)
container_of(timer, struct signal_struct, real_timer);
trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0);
+ trace_timer_itimer_expired(sig);
kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid);
return HRTIMER_NORESTART;
@@ -201,6 +206,8 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue)
!timeval_valid(&value->it_interval))
return -EINVAL;
+ trace_timer_itimer_set(which, value);
+
switch (which) {
case ITIMER_REAL:
again:
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ec19b92c7eb..779f0031929 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -33,6 +33,7 @@
#include <linux/vmalloc.h>
#include <linux/swap.h>
#include <linux/kmsg_dump.h>
+#include <trace/kernel.h>
#include <asm/page.h>
#include <asm/uaccess.h>
@@ -40,6 +41,9 @@
#include <asm/system.h>
#include <asm/sections.h>
+DEFINE_TRACE(kernel_kernel_kexec);
+DEFINE_TRACE(kernel_crash_kexec);
+
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
@@ -1066,6 +1070,8 @@ asmlinkage long compat_sys_kexec_load(unsigned long entry,
void crash_kexec(struct pt_regs *regs)
{
+ trace_kernel_crash_kexec(kexec_crash_image, regs);
+
/* Take the kexec_mutex here to prevent sys_kexec_load
* running on one cpu from replacing the crash kernel
* we are using after a panic on a different cpu.
@@ -1495,6 +1501,8 @@ int kernel_kexec(void)
{
int error = 0;
+ trace_kernel_kernel_kexec(kexec_image);
+
if (!mutex_trylock(&kexec_mutex))
return -EBUSY;
if (!kexec_image) {
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 0d2058da80f..e0841c537db 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -49,6 +49,8 @@
#include "lockdep_internals.h"
+#include <trace/lockdep.h>
+
#define CREATE_TRACE_POINTS
#include <trace/events/lock.h>
@@ -66,6 +68,13 @@ module_param(lock_stat, int, 0644);
#define lock_stat 0
#endif
+DEFINE_TRACE(lockdep_hardirqs_on);
+DEFINE_TRACE(lockdep_hardirqs_off);
+DEFINE_TRACE(lockdep_softirqs_on);
+DEFINE_TRACE(lockdep_softirqs_off);
+DEFINE_TRACE(lockdep_lock_acquire);
+DEFINE_TRACE(lockdep_lock_release);
+
/*
* lockdep_lock: protects the lockdep graph, the hashes and the
* class/list/hash allocators.
@@ -2300,6 +2309,8 @@ void trace_hardirqs_on_caller(unsigned long ip)
time_hardirqs_on(CALLER_ADDR0, ip);
+ trace_lockdep_hardirqs_on(ip);
+
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2358,6 +2369,8 @@ void trace_hardirqs_off_caller(unsigned long ip)
time_hardirqs_off(CALLER_ADDR0, ip);
+ trace_lockdep_hardirqs_off(ip);
+
if (unlikely(!debug_locks || current->lockdep_recursion))
return;
@@ -2390,6 +2403,8 @@ void trace_softirqs_on(unsigned long ip)
{
struct task_struct *curr = current;
+ trace_lockdep_softirqs_on(ip);
+
if (unlikely(!debug_locks))
return;
@@ -2424,6 +2439,8 @@ void trace_softirqs_off(unsigned long ip)
{
struct task_struct *curr = current;
+ trace_lockdep_softirqs_off(ip);
+
if (unlikely(!debug_locks))
return;
@@ -2730,6 +2747,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
int class_idx;
u64 chain_key;
+ trace_lockdep_lock_acquire(ip, subclass, lock, trylock, read,
+ hardirqs_off);
+
if (!prove_locking)
check = 1;
@@ -3108,6 +3128,8 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip)
{
struct task_struct *curr = current;
+ trace_lockdep_lock_release(ip, lock, nested);
+
if (!check_unlock(curr, lock, ip))
return;
diff --git a/kernel/ltt-channels.c b/kernel/ltt-channels.c
new file mode 100644
index 00000000000..102513874ad
--- /dev/null
+++ b/kernel/ltt-channels.c
@@ -0,0 +1,388 @@
+/*
+ * ltt/ltt-channels.c
+ *
+ * (C) Copyright 2008 - Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ *
+ * LTTng channel management.
+ *
+ * Author:
+ * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ *
+ * Dual LGPL v2.1/GPL v2 license.
+ */
+
+#include <linux/module.h>
+#include <linux/ltt-channels.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/*
+ * ltt_channel_mutex may be nested inside the LTT trace mutex.
+ * ltt_channel_mutex mutex may be nested inside markers mutex.
+ */
+static DEFINE_MUTEX(ltt_channel_mutex);
+static LIST_HEAD(ltt_channels);
+/*
+ * Index of next channel in array. Makes sure that as long as a trace channel is
+ * allocated, no array index will be re-used when a channel is freed and then
+ * another channel is allocated. This index is cleared and the array indexeds
+ * get reassigned when the index_kref goes back to 0, which indicates that no
+ * more trace channels are allocated.
+ */
+static unsigned int free_index;
+/* index_kref is protected by both ltt_channel_mutex and lock_markers */
+static struct kref index_kref; /* Keeps track of allocated trace channels */
+
+static struct ltt_channel_setting *lookup_channel(const char *name)
+{
+ struct ltt_channel_setting *iter;
+
+ list_for_each_entry(iter, &ltt_channels, list)
+ if (strcmp(name, iter->name) == 0)
+ return iter;
+ return NULL;
+}
+
+/*
+ * Must be called when channel refcount falls to 0 _and_ also when the last
+ * trace is freed. This function is responsible for compacting the channel and
+ * event IDs when no users are active.
+ *
+ * Called with lock_markers() and channels mutex held.
+ */
+static void release_channel_setting(struct kref *kref)
+{
+ struct ltt_channel_setting *setting = container_of(kref,
+ struct ltt_channel_setting, kref);
+ struct ltt_channel_setting *iter;
+
+ if (atomic_read(&index_kref.refcount) == 0
+ && atomic_read(&setting->kref.refcount) == 0) {
+ list_del(&setting->list);
+ kfree(setting);
+
+ free_index = 0;
+ list_for_each_entry(iter, &ltt_channels, list) {
+ iter->index = free_index++;
+ iter->free_event_id = 0;
+ }
+ }
+}
+
+/*
+ * Perform channel index compaction when the last trace channel is freed.
+ *
+ * Called with lock_markers() and channels mutex held.
+ */
+static void release_trace_channel(struct kref *kref)
+{
+ struct ltt_channel_setting *iter, *n;
+
+ list_for_each_entry_safe(iter, n, &ltt_channels, list)
+ release_channel_setting(&iter->kref);
+ if (atomic_read(&index_kref.refcount) == 0)
+ markers_compact_event_ids();
+}
+
+/*
+ * ltt_channel_trace_ref : Is there an existing trace session ?
+ *
+ * Must be called with lock_markers() held.
+ */
+int ltt_channels_trace_ref(void)
+{
+ return !!atomic_read(&index_kref.refcount);
+}
+EXPORT_SYMBOL_GPL(ltt_channels_trace_ref);
+
+/**
+ * ltt_channels_register - Register a trace channel.
+ * @name: channel name
+ *
+ * Uses refcounting.
+ */
+int ltt_channels_register(const char *name)
+{
+ struct ltt_channel_setting *setting;
+ int ret = 0;
+
+ mutex_lock(&ltt_channel_mutex);
+ setting = lookup_channel(name);
+ if (setting) {
+ if (atomic_read(&setting->kref.refcount) == 0)
+ goto init_kref;
+ else {
+ kref_get(&setting->kref);
+ goto end;
+ }
+ }
+ setting = kzalloc(sizeof(*setting), GFP_KERNEL);
+ if (!setting) {
+ ret = -ENOMEM;
+ goto end;
+ }
+ list_add(&setting->list, &ltt_channels);
+ strncpy(setting->name, name, PATH_MAX-1);
+ setting->index = free_index++;
+init_kref:
+ kref_init(&setting->kref);
+end:
+ mutex_unlock(&ltt_channel_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ltt_channels_register);
+
+/**
+ * ltt_channels_unregister - Unregister a trace channel.
+ * @name: channel name
+ * @compacting: performing compaction
+ *
+ * Must be called with markers mutex held.
+ */
+int ltt_channels_unregister(const char *name, int compacting)
+{
+ struct ltt_channel_setting *setting;
+ int ret = 0;
+
+ if (!compacting)
+ mutex_lock(&ltt_channel_mutex);
+ setting = lookup_channel(name);
+ if (!setting || atomic_read(&setting->kref.refcount) == 0) {
+ ret = -ENOENT;
+ goto end;
+ }
+ kref_put(&setting->kref, release_channel_setting);
+ if (!compacting && atomic_read(&index_kref.refcount) == 0)
+ markers_compact_event_ids();
+end:
+ if (!compacting)
+ mutex_unlock(&ltt_channel_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ltt_channels_unregister);
+
+/**
+ * ltt_channels_set_default - Set channel default behavior.
+ * @name: default channel name
+ * @sb_size: size of the subbuffers
+ * @n_sb: number of subbuffers
+ */
+int ltt_channels_set_default(const char *name,
+ unsigned int sb_size,
+ unsigned int n_sb)
+{
+ struct ltt_channel_setting *setting;
+ int ret = 0;
+
+ mutex_lock(&ltt_channel_mutex);
+ setting = lookup_channel(name);
+ if (!setting || atomic_read(&setting->kref.refcount) == 0) {
+ ret = -ENOENT;
+ goto end;
+ }
+ setting->sb_size = sb_size;
+ setting->n_sb = n_sb;
+end:
+ mutex_unlock(&ltt_channel_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ltt_channels_set_default);
+
+/**
+ * ltt_channels_get_name_from_index - get channel name from channel index
+ * @index: channel index
+ *
+ * Allows to lookup the channel name given its index. Done to keep the name
+ * information outside of each trace channel instance.
+ */
+const char *ltt_channels_get_name_from_index(unsigned int index)
+{
+ struct ltt_channel_setting *iter;
+
+ list_for_each_entry(iter, &ltt_channels, list)
+ if (iter->index == index && atomic_read(&iter->kref.refcount))
+ return iter->name;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(ltt_channels_get_name_from_index);
+
+static struct ltt_channel_setting *
+ltt_channels_get_setting_from_name(const char *name)
+{
+ struct ltt_channel_setting *iter;
+
+ list_for_each_entry(iter, &ltt_channels, list)
+ if (!strcmp(iter->name, name)
+ && atomic_read(&iter->kref.refcount))
+ return iter;
+ return NULL;
+}
+
+/**
+ * ltt_channels_get_index_from_name - get channel index from channel name
+ * @name: channel name
+ *
+ * Allows to lookup the channel index given its name. Done to keep the name
+ * information outside of each trace channel instance.
+ * Returns -1 if not found.
+ */
+int ltt_channels_get_index_from_name(const char *name)
+{
+ struct ltt_channel_setting *setting;
+
+ setting = ltt_channels_get_setting_from_name(name);
+ if (setting)
+ return setting->index;
+ else
+ return -1;
+}
+EXPORT_SYMBOL_GPL(ltt_channels_get_index_from_name);
+
+/**
+ * ltt_channels_trace_alloc - Allocate channel structures for a trace
+ * @sb_size: subbuffer size. 0 uses default.
+ * @n_sb: number of subbuffers per per-cpu buffers. 0 uses default.
+ * @flags: Default channel flags
+ *
+ * Use the current channel list to allocate the channels for a trace.
+ * Called with trace lock held. Does not perform the trace buffer allocation,
+ * because we must let the user overwrite specific channel sizes.
+ */
+struct ltt_chan *ltt_channels_trace_alloc(unsigned int *nr_channels,
+ int overwrite, int active)
+{
+ struct ltt_chan *chan = NULL;
+ struct ltt_channel_setting *iter;
+
+ lock_markers();
+ mutex_lock(&ltt_channel_mutex);
+ if (!free_index)
+ goto end;
+ if (!atomic_read(&index_kref.refcount))
+ kref_init(&index_kref);
+ else
+ kref_get(&index_kref);
+ *nr_channels = free_index;
+ chan = kzalloc(sizeof(struct ltt_chan) * free_index, GFP_KERNEL);
+ if (!chan)
+ goto end;
+ list_for_each_entry(iter, &ltt_channels, list) {
+ if (!atomic_read(&iter->kref.refcount))
+ continue;
+ chan[iter->index].a.sb_size = iter->sb_size;
+ chan[iter->index].a.n_sb = iter->n_sb;
+ chan[iter->index].overwrite = overwrite;
+ chan[iter->index].active = active;
+ strncpy(chan[iter->index].a.filename, iter->name, NAME_MAX - 1);
+ chan[iter->index].switch_timer_interval = 0;
+ }
+end:
+ mutex_unlock(&ltt_channel_mutex);
+ unlock_markers();
+ return chan;
+}
+EXPORT_SYMBOL_GPL(ltt_channels_trace_alloc);
+
+/**
+ * ltt_channels_trace_free - Free one trace's channels
+ * @channels: channels to free
+ *
+ * Called with trace lock held. The actual channel buffers must be freed before
+ * this function is called.
+ */
+void ltt_channels_trace_free(struct ltt_chan *channels,
+ unsigned int nr_channels)
+{
+ lock_markers();
+ mutex_lock(&ltt_channel_mutex);
+ kfree(channels);
+ kref_put(&index_kref, release_trace_channel);
+ mutex_unlock(&ltt_channel_mutex);
+ unlock_markers();
+ marker_update_probes();
+}
+EXPORT_SYMBOL_GPL(ltt_channels_trace_free);
+
+/**
+ * ltt_channels_trace_set_timer - set switch timer
+ * @channel: channel
+ * @interval: interval of timer interrupt, in jiffies. 0 inhibits timer.
+ */
+
+void ltt_channels_trace_set_timer(struct ltt_chan *chan,
+ unsigned long interval)
+{
+ chan->switch_timer_interval = interval;
+}
+EXPORT_SYMBOL_GPL(ltt_channels_trace_set_timer);
+
+/**
+ * _ltt_channels_get_event_id - get next event ID for a marker
+ * @channel: channel name
+ * @name: event name
+ *
+ * Returns a unique event ID (for this channel) or < 0 on error.
+ * Must be called with channels mutex held.
+ */
+int _ltt_channels_get_event_id(const char *channel, const char *name)
+{
+ struct ltt_channel_setting *setting;
+ int ret;
+
+ setting = ltt_channels_get_setting_from_name(channel);
+ if (!setting) {
+ ret = -ENOENT;
+ goto end;
+ }
+ if (strcmp(channel, "metadata") == 0) {
+ if (strcmp(name, "core_marker_id") == 0)
+ ret = 0;
+ else if (strcmp(name, "core_marker_format") == 0)
+ ret = 1;
+ else
+ ret = -ENOENT;
+ goto end;
+ }
+ if (setting->free_event_id == EVENTS_PER_CHANNEL - 1) {
+ ret = -ENOSPC;
+ goto end;
+ }
+ ret = setting->free_event_id++;
+end:
+ return ret;
+}
+
+/**
+ * ltt_channels_get_event_id - get next event ID for a marker
+ * @channel: channel name
+ * @name: event name
+ *
+ * Returns a unique event ID (for this channel) or < 0 on error.
+ */
+int ltt_channels_get_event_id(const char *channel, const char *name)
+{
+ int ret;
+
+ mutex_lock(&ltt_channel_mutex);
+ ret = _ltt_channels_get_event_id(channel, name);
+ mutex_unlock(&ltt_channel_mutex);
+ return ret;
+}
+
+/**
+ * ltt_channels_reset_event_ids - reset event IDs at compaction
+ *
+ * Called with lock marker and channel mutex held.
+ */
+void _ltt_channels_reset_event_ids(void)
+{
+ struct ltt_channel_setting *iter;
+
+ list_for_each_entry(iter, &ltt_channels, list)
+ iter->free_event_id = 0;
+}
+
+MODULE_LICENSE("GPL and additional rights");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Linux Trace Toolkit Next Generation Channel Management");
diff --git a/kernel/marker.c b/kernel/marker.c
new file mode 100644
index 00000000000..eac8ebfc3b9
--- /dev/null
+++ b/kernel/marker.c
@@ -0,0 +1,1262 @@
+/*
+ * Copyright (C) 2007 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/jhash.h>
+#include <linux/hash.h>
+#include <linux/list.h>
+#include <linux/rcupdate.h>
+#include <linux/marker.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/immediate.h>
+#include <linux/ltt-channels.h>
+
+extern struct marker __start___markers[];
+extern struct marker __stop___markers[];
+
+/* Set to 1 to enable marker debug output */
+static const int marker_debug;
+
+/*
+ * markers_mutex nests inside module_mutex. Markers mutex protects the builtin
+ * and module markers and the hash table.
+ * markers_mutex nests inside the trace lock, to ensure event ID consistency
+ * between the hash table and the marker section.
+ */
+static DEFINE_MUTEX(markers_mutex);
+
+void lock_markers(void)
+{
+ mutex_lock(&markers_mutex);
+}
+EXPORT_SYMBOL_GPL(lock_markers);
+
+void unlock_markers(void)
+{
+ mutex_unlock(&markers_mutex);
+}
+EXPORT_SYMBOL_GPL(unlock_markers);
+
+/*
+ * Marker hash table, containing the active markers.
+ * Protected by module_mutex.
+ */
+#define MARKER_HASH_BITS 6
+#define MARKER_TABLE_SIZE (1 << MARKER_HASH_BITS)
+static struct hlist_head marker_table[MARKER_TABLE_SIZE];
+static struct hlist_head id_table[MARKER_TABLE_SIZE];
+
+struct marker_probe_array {
+ struct rcu_head rcu;
+ struct marker_probe_closure c[0];
+};
+
+/*
+ * Note about RCU :
+ * It is used to make sure every handler has finished using its private data
+ * between two consecutive operation (add or remove) on a given marker. It is
+ * also used to delay the free of multiple probes array until a quiescent state
+ * is reached.
+ * marker entries modifications are protected by the markers_mutex.
+ */
+struct marker_entry {
+ struct hlist_node hlist;
+ struct hlist_node id_list;
+ char *format;
+ char *name;
+ /* Probe wrapper */
+ void (*call)(const struct marker *mdata, void *call_private, ...);
+ struct marker_probe_closure single;
+ struct marker_probe_array *multi;
+ int refcount; /* Number of times armed. 0 if disarmed. */
+ u16 channel_id;
+ u16 event_id;
+ unsigned char ptype:1;
+ unsigned char format_allocated:1;
+ char channel[0]; /* Contains channel'\0'name'\0'format'\0' */
+};
+
+/**
+ * __mark_empty_function - Empty probe callback
+ * @mdata: marker data
+ * @probe_private: probe private data
+ * @call_private: call site private data
+ * @fmt: format string
+ * @...: variable argument list
+ *
+ * Empty callback provided as a probe to the markers. By providing this to a
+ * disabled marker, we make sure the execution flow is always valid even
+ * though the function pointer change and the marker enabling are two distinct
+ * operations that modifies the execution flow of preemptible code.
+ */
+notrace void __mark_empty_function(const struct marker *mdata,
+ void *probe_private, void *call_private, const char *fmt, va_list *args)
+{
+}
+EXPORT_SYMBOL_GPL(__mark_empty_function);
+
+/*
+ * marker_probe_cb Callback that prepares the variable argument list for probes.
+ * @mdata: pointer of type struct marker
+ * @call_private: caller site private data
+ * @...: Variable argument list.
+ *
+ * Since we do not use "typical" pointer based RCU in the 1 argument case, we
+ * need to put a full smp_rmb() in this branch. This is why we do not use
+ * rcu_dereference() for the pointer read.
+ */
+notrace void marker_probe_cb(const struct marker *mdata,
+ void *call_private, ...)
+{
+ va_list args;
+ char ptype;
+
+ /*
+ * rcu_read_lock_sched does two things : disabling preemption to make
+ * sure the teardown of the callbacks can be done correctly when they
+ * are in modules and they insure RCU read coherency.
+ */
+ rcu_read_lock_sched_notrace();
+ ptype = mdata->ptype;
+ if (likely(!ptype)) {
+ marker_probe_func *func;
+ /* Must read the ptype before ptr. They are not data dependant,
+ * so we put an explicit smp_rmb() here. */
+ smp_rmb();
+ func = mdata->single.func;
+ /* Must read the ptr before private data. They are not data
+ * dependant, so we put an explicit smp_rmb() here. */
+ smp_rmb();
+ va_start(args, call_private);
+ func(mdata, mdata->single.probe_private, call_private,
+ mdata->format, &args);
+ va_end(args);
+ } else {
+ struct marker_probe_array *multi;
+ int i;
+ /*
+ * Read mdata->ptype before mdata->multi.
+ */
+ smp_rmb();
+ multi = mdata->multi;
+ /*
+ * multi points to an array, therefore accessing the array
+ * depends on reading multi. However, even in this case,
+ * we must insure that the pointer is read _before_ the array
+ * data. Same as rcu_dereference, but we need a full smp_rmb()
+ * in the fast path, so put the explicit barrier here.
+ */
+ smp_read_barrier_depends();
+ for (i = 0; multi->c[i].func; i++) {
+ va_start(args, call_private);
+ multi->c[i].func(mdata, multi->c[i].probe_private,
+ call_private, mdata->format, &args);
+ va_end(args);
+ }
+ }
+ rcu_read_unlock_sched_notrace();
+}
+EXPORT_SYMBOL_GPL(marker_probe_cb);
+
+/*
+ * marker_probe_cb Callback that does not prepare the variable argument list.
+ * @mdata: pointer of type struct marker
+ * @call_private: caller site private data
+ * @...: Variable argument list.
+ *
+ * Should be connected to markers "MARK_NOARGS".
+ */
+static notrace void marker_probe_cb_noarg(const struct marker *mdata,
+ void *call_private, ...)
+{
+ va_list args; /* not initialized */
+ char ptype;
+
+ rcu_read_lock_sched_notrace();
+ ptype = mdata->ptype;
+ if (likely(!ptype)) {
+ marker_probe_func *func;
+ /* Must read the ptype before ptr. They are not data dependant,
+ * so we put an explicit smp_rmb() here. */
+ smp_rmb();
+ func = mdata->single.func;
+ /* Must read the ptr before private data. They are not data
+ * dependant, so we put an explicit smp_rmb() here. */
+ smp_rmb();
+ func(mdata, mdata->single.probe_private, call_private,
+ mdata->format, &args);
+ } else {
+ struct marker_probe_array *multi;
+ int i;
+ /*
+ * Read mdata->ptype before mdata->multi.
+ */
+ smp_rmb();
+ multi = mdata->multi;
+ /*
+ * multi points to an array, therefore accessing the array
+ * depends on reading multi. However, even in this case,
+ * we must insure that the pointer is read _before_ the array
+ * data. Same as rcu_dereference, but we need a full smp_rmb()
+ * in the fast path, so put the explicit barrier here.
+ */
+ smp_read_barrier_depends();
+ for (i = 0; multi->c[i].func; i++)
+ multi->c[i].func(mdata, multi->c[i].probe_private,
+ call_private, mdata->format, &args);
+ }
+ rcu_read_unlock_sched_notrace();
+}
+
+static void free_old_closure(struct rcu_head *head)
+{
+ struct marker_probe_array *multi = container_of(head, struct marker_probe_array, rcu);
+ kfree(multi);
+}
+
+static void debug_print_probes(struct marker_entry *entry)
+{
+ int i;
+
+ if (!marker_debug)
+ return;
+
+ if (!entry->ptype) {
+ printk(KERN_DEBUG "Single probe : %p %p\n",
+ entry->single.func,
+ entry->single.probe_private);
+ } else {
+ for (i = 0; entry->multi->c[i].func; i++)
+ printk(KERN_DEBUG "Multi probe %d : %p %p\n", i,
+ entry->multi->c[i].func,
+ entry->multi->c[i].probe_private);
+ }
+}
+
+static struct marker_probe_array *
+marker_entry_add_probe(struct marker_entry *entry,
+ marker_probe_func *probe, void *probe_private)
+{
+ int nr_probes = 0;
+ struct marker_probe_array *old, *new;
+
+ WARN_ON(!probe);
+
+ debug_print_probes(entry);
+ old = entry->multi;
+ if (!entry->ptype) {
+ if (entry->single.func == probe &&
+ entry->single.probe_private == probe_private)
+ return ERR_PTR(-EBUSY);
+ if (entry->single.func == __mark_empty_function) {
+ /* 0 -> 1 probes */
+ entry->single.func = probe;
+ entry->single.probe_private = probe_private;
+ entry->refcount = 1;
+ entry->ptype = 0;
+ debug_print_probes(entry);
+ return NULL;
+ } else {
+ /* 1 -> 2 probes */
+ nr_probes = 1;
+ old = NULL;
+ }
+ } else {
+ /* (N -> N+1), (N != 0, 1) probes */
+ for (nr_probes = 0; old->c[nr_probes].func; nr_probes++)
+ if (old->c[nr_probes].func == probe
+ && old->c[nr_probes].probe_private
+ == probe_private)
+ return ERR_PTR(-EBUSY);
+ }
+ /* + 2 : one for new probe, one for NULL func */
+ new = kzalloc(sizeof(struct marker_probe_array)
+ + ((nr_probes + 2) * sizeof(struct marker_probe_closure)),
+ GFP_KERNEL);
+ if (new == NULL)
+ return ERR_PTR(-ENOMEM);
+ if (!old)
+ new->c[0] = entry->single;
+ else
+ memcpy(&new->c[0], &old->c[0],
+ nr_probes * sizeof(struct marker_probe_closure));
+ new->c[nr_probes].func = probe;
+ new->c[nr_probes].probe_private = probe_private;
+ entry->refcount = nr_probes + 1;
+ entry->multi = new;
+ entry->ptype = 1;
+ debug_print_probes(entry);
+ return old;
+}
+
+static struct marker_probe_array *
+marker_entry_remove_probe(struct marker_entry *entry,
+ marker_probe_func *probe, void *probe_private)
+{
+ int nr_probes = 0, nr_del = 0, i;
+ struct marker_probe_array *old, *new;
+
+ old = entry->multi;
+
+ debug_print_probes(entry);
+ if (!entry->ptype) {
+ /* 0 -> N is an error */
+ WARN_ON(entry->single.func == __mark_empty_function);
+ /* 1 -> 0 probes */
+ WARN_ON(probe && entry->single.func != probe);
+ WARN_ON(entry->single.probe_private != probe_private);
+ entry->single.func = __mark_empty_function;
+ entry->refcount = 0;
+ entry->ptype = 0;
+ debug_print_probes(entry);
+ return NULL;
+ } else {
+ /* (N -> M), (N > 1, M >= 0) probes */
+ for (nr_probes = 0; old->c[nr_probes].func; nr_probes++) {
+ if ((!probe || old->c[nr_probes].func == probe)
+ && old->c[nr_probes].probe_private
+ == probe_private)
+ nr_del++;
+ }
+ }
+
+ if (nr_probes - nr_del == 0) {
+ /* N -> 0, (N > 1) */
+ entry->single.func = __mark_empty_function;
+ entry->refcount = 0;
+ entry->ptype = 0;
+ } else if (nr_probes - nr_del == 1) {
+ /* N -> 1, (N > 1) */
+ for (i = 0; old->c[i].func; i++)
+ if ((probe && old->c[i].func != probe) ||
+ old->c[i].probe_private != probe_private)
+ entry->single = old->c[i];
+ entry->refcount = 1;
+ entry->ptype = 0;
+ } else {
+ int j = 0;
+ /* N -> M, (N > 1, M > 1) */
+ /* + 1 for NULL */
+ new = kzalloc(sizeof(struct marker_probe_array)
+ + ((nr_probes - nr_del + 1)
+ * sizeof(struct marker_probe_closure)),
+ GFP_KERNEL);
+ if (new == NULL)
+ return ERR_PTR(-ENOMEM);
+ for (i = 0; old->c[i].func; i++)
+ if ((probe && old->c[i].func != probe) ||
+ old->c[i].probe_private != probe_private)
+ new->c[j++] = old->c[i];
+ entry->refcount = nr_probes - nr_del;
+ entry->ptype = 1;
+ entry->multi = new;
+ }
+ debug_print_probes(entry);
+ return old;
+}
+
+/*
+ * Get marker if the marker is present in the marker hash table.
+ * Must be called with markers_mutex held.
+ * Returns NULL if not present.
+ */
+static struct marker_entry *get_marker(const char *channel, const char *name)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct marker_entry *e;
+ size_t channel_len = strlen(channel) + 1;
+ size_t name_len = strlen(name) + 1;
+ u32 hash;
+
+ hash = jhash(channel, channel_len-1, 0) ^ jhash(name, name_len-1, 0);
+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+ hlist_for_each_entry(e, node, head, hlist) {
+ if (!strcmp(channel, e->channel) && !strcmp(name, e->name))
+ return e;
+ }
+ return NULL;
+}
+
+/*
+ * Add the marker to the marker hash table. Must be called with markers_mutex
+ * held.
+ */
+static struct marker_entry *add_marker(const char *channel, const char *name,
+ const char *format)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct marker_entry *e;
+ size_t channel_len = strlen(channel) + 1;
+ size_t name_len = strlen(name) + 1;
+ size_t format_len = 0;
+ u32 hash;
+
+ hash = jhash(channel, channel_len-1, 0) ^ jhash(name, name_len-1, 0);
+ if (format)
+ format_len = strlen(format) + 1;
+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+ hlist_for_each_entry(e, node, head, hlist) {
+ if (!strcmp(channel, e->channel) && !strcmp(name, e->name)) {
+ printk(KERN_NOTICE
+ "Marker %s.%s busy\n", channel, name);
+ return ERR_PTR(-EBUSY); /* Already there */
+ }
+ }
+ /*
+ * Using kmalloc here to allocate a variable length element. Could
+ * cause some memory fragmentation if overused.
+ */
+ e = kmalloc(sizeof(struct marker_entry)
+ + channel_len + name_len + format_len,
+ GFP_KERNEL);
+ if (!e)
+ return ERR_PTR(-ENOMEM);
+ memcpy(e->channel, channel, channel_len);
+ e->name = &e->channel[channel_len];
+ memcpy(e->name, name, name_len);
+ if (format) {
+ e->format = &e->name[name_len];
+ memcpy(e->format, format, format_len);
+ if (strcmp(e->format, MARK_NOARGS) == 0)
+ e->call = marker_probe_cb_noarg;
+ else
+ e->call = marker_probe_cb;
+ trace_mark(metadata, core_marker_format,
+ "channel %s name %s format %s",
+ e->channel, e->name, e->format);
+ } else {
+ e->format = NULL;
+ e->call = marker_probe_cb;
+ }
+ e->single.func = __mark_empty_function;
+ e->single.probe_private = NULL;
+ e->multi = NULL;
+ e->ptype = 0;
+ e->format_allocated = 0;
+ e->refcount = 0;
+ hlist_add_head(&e->hlist, head);
+ return e;
+}
+
+/*
+ * Remove the marker from the marker hash table. Must be called with mutex_lock
+ * held. Parameter "registered" indicates if the channel registration has been
+ * performed.
+ */
+static int remove_marker(const char *channel, const char *name, int registered,
+ int compacting)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct marker_entry *e;
+ int found = 0;
+ size_t channel_len = strlen(channel) + 1;
+ size_t name_len = strlen(name) + 1;
+ u32 hash;
+ int ret;
+
+ hash = jhash(channel, channel_len-1, 0) ^ jhash(name, name_len-1, 0);
+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+ hlist_for_each_entry(e, node, head, hlist) {
+ if (!strcmp(channel, e->channel) && !strcmp(name, e->name)) {
+ found = 1;
+ break;
+ }
+ }
+ if (!found)
+ return -ENOENT;
+ if (e->single.func != __mark_empty_function)
+ return -EBUSY;
+
+ if (registered && ltt_channels_trace_ref())
+ return 0;
+
+ hlist_del(&e->hlist);
+ hlist_del(&e->id_list);
+ if (registered) {
+ ret = ltt_channels_unregister(e->channel, compacting);
+ WARN_ON(ret);
+ }
+ if (e->format_allocated)
+ kfree(e->format);
+ kfree(e);
+ return 0;
+}
+
+/*
+ * Set the mark_entry format to the format found in the element.
+ */
+static int marker_set_format(struct marker_entry *entry, const char *format)
+{
+ entry->format = kstrdup(format, GFP_KERNEL);
+ if (!entry->format)
+ return -ENOMEM;
+ entry->format_allocated = 1;
+
+ trace_mark(metadata, core_marker_format,
+ "channel %s name %s format %s",
+ entry->channel, entry->name, entry->format);
+ return 0;
+}
+
+/*
+ * Sets the probe callback corresponding to one marker.
+ */
+static int set_marker(struct marker_entry *entry, struct marker *elem,
+ int active)
+{
+ int ret = 0;
+ WARN_ON(strcmp(entry->name, elem->name) != 0);
+
+ if (entry->format) {
+ if (strcmp(entry->format, elem->format) != 0) {
+ printk(KERN_NOTICE
+ "Format mismatch for probe %s "
+ "(%s), marker (%s)\n",
+ entry->name,
+ entry->format,
+ elem->format);
+ return -EPERM;
+ }
+ } else {
+ ret = marker_set_format(entry, elem->format);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * probe_cb setup (statically known) is done here. It is
+ * asynchronous with the rest of execution, therefore we only
+ * pass from a "safe" callback (with argument) to an "unsafe"
+ * callback (does not set arguments).
+ */
+ elem->call = entry->call;
+ elem->channel_id = entry->channel_id;
+ elem->event_id = entry->event_id;
+ /*
+ * Sanity check :
+ * We only update the single probe private data when the ptr is
+ * set to a _non_ single probe! (0 -> 1 and N -> 1, N != 1)
+ */
+ WARN_ON(elem->single.func != __mark_empty_function
+ && elem->single.probe_private != entry->single.probe_private
+ && !elem->ptype);
+ elem->single.probe_private = entry->single.probe_private;
+ /*
+ * Make sure the private data is valid when we update the
+ * single probe ptr.
+ */
+ smp_wmb();
+ elem->single.func = entry->single.func;
+ /*
+ * We also make sure that the new probe callbacks array is consistent
+ * before setting a pointer to it.
+ */
+ rcu_assign_pointer(elem->multi, entry->multi);
+ /*
+ * Update the function or multi probe array pointer before setting the
+ * ptype.
+ */
+ smp_wmb();
+ elem->ptype = entry->ptype;
+
+ if (elem->tp_name && (active ^ _imv_read(elem->state))) {
+ WARN_ON(!elem->tp_cb);
+ /*
+ * It is ok to directly call the probe registration because type
+ * checking has been done in the __trace_mark_tp() macro.
+ */
+
+ if (active) {
+ /*
+ * try_module_get should always succeed because we hold
+ * lock_module() to get the tp_cb address.
+ */
+ ret = try_module_get(__module_text_address(
+ (unsigned long)elem->tp_cb));
+ BUG_ON(!ret);
+ ret = tracepoint_probe_register_noupdate(
+ elem->tp_name,
+ elem->tp_cb, NULL);
+ } else {
+ ret = tracepoint_probe_unregister_noupdate(
+ elem->tp_name,
+ elem->tp_cb, NULL);
+ /*
+ * tracepoint_probe_update_all() must be called
+ * before the module containing tp_cb is unloaded.
+ */
+ module_put(__module_text_address(
+ (unsigned long)elem->tp_cb));
+ }
+ }
+ elem->state__imv = active;
+
+ return ret;
+}
+
+/*
+ * Disable a marker and its probe callback.
+ * Note: only waiting an RCU period after setting elem->call to the empty
+ * function insures that the original callback is not used anymore. This insured
+ * by rcu_read_lock_sched around the call site.
+ */
+static void disable_marker(struct marker *elem)
+{
+ int ret;
+
+ /* leave "call" as is. It is known statically. */
+ if (elem->tp_name && _imv_read(elem->state)) {
+ WARN_ON(!elem->tp_cb);
+ /*
+ * It is ok to directly call the probe registration because type
+ * checking has been done in the __trace_mark_tp() macro.
+ */
+ ret = tracepoint_probe_unregister_noupdate(elem->tp_name,
+ elem->tp_cb, NULL);
+ WARN_ON(ret);
+ /*
+ * tracepoint_probe_update_all() must be called
+ * before the module containing tp_cb is unloaded.
+ */
+ module_put(__module_text_address((unsigned long)elem->tp_cb));
+ }
+ elem->state__imv = 0;
+ elem->single.func = __mark_empty_function;
+ /* Update the function before setting the ptype */
+ smp_wmb();
+ elem->ptype = 0; /* single probe */
+ /*
+ * Leave the private data and channel_id/event_id there, because removal
+ * is racy and should be done only after an RCU period. These are never
+ * used until the next initialization anyway.
+ */
+}
+
+/*
+ * is_marker_present - Check if a marker is present in kernel.
+ * @channel: channel name
+ * @name: marker name
+ *
+ * We cannot take the marker lock around calls to this function because it needs
+ * to take the module mutex within the iterator. Marker mutex nests inside
+ * module mutex.
+ * Returns 1 if the marker is present, 0 if not.
+ */
+int is_marker_present(const char *channel, const char *name)
+{
+ int ret;
+ struct marker_iter iter;
+
+ ret = 0;
+
+ marker_iter_reset(&iter);
+ marker_iter_start(&iter);
+ for (; iter.marker != NULL; marker_iter_next(&iter)) {
+ if (!strcmp(iter.marker->channel, channel) &&
+ !strcmp(iter.marker->name, name)) {
+ ret = 1;
+ goto end;
+ }
+ }
+end:
+ marker_iter_stop(&iter);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(is_marker_present);
+
+/*
+ * _is_marker_enabled - Check if a marker is enabled, must be called with
+ * markers_mutex held.
+ * @channel: channel name
+ * @name: marker name
+ *
+ * Returns 1 if the marker is enabled, 0 if disabled.
+ */
+int _is_marker_enabled(const char *channel, const char *name)
+{
+ struct marker_entry *entry;
+
+ entry = get_marker(channel, name);
+
+ return entry && !!entry->refcount;
+}
+EXPORT_SYMBOL_GPL(_is_marker_enabled);
+
+/*
+ * is_marker_enabled - the wrapper of _is_marker_enabled
+ * @channel: channel name
+ * @name: marker name
+ *
+ * Returns 1 if the marker is enabled, 0 if disabled.
+ */
+int is_marker_enabled(const char *channel, const char *name)
+{
+ int ret;
+
+ lock_markers();
+ ret = _is_marker_enabled(channel, name);
+ unlock_markers();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(is_marker_enabled);
+
+/**
+ * marker_update_probe_range - Update a probe range
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Updates the probe callback corresponding to a range of markers.
+ */
+void marker_update_probe_range(struct marker *begin,
+ struct marker *end)
+{
+ struct marker *iter;
+ struct marker_entry *mark_entry;
+
+ mutex_lock(&markers_mutex);
+ for (iter = begin; iter < end; iter++) {
+ mark_entry = get_marker(iter->channel, iter->name);
+ if (mark_entry) {
+ set_marker(mark_entry, iter, !!mark_entry->refcount);
+ /*
+ * ignore error, continue
+ */
+ } else {
+ disable_marker(iter);
+ }
+ }
+ mutex_unlock(&markers_mutex);
+}
+
+/*
+ * Update probes, removing the faulty probes.
+ *
+ * Internal callback only changed before the first probe is connected to it.
+ * Single probe private data can only be changed on 0 -> 1 and 2 -> 1
+ * transitions. All other transitions will leave the old private data valid.
+ * This makes the non-atomicity of the callback/private data updates valid.
+ *
+ * "special case" updates :
+ * 0 -> 1 callback
+ * 1 -> 0 callback
+ * 1 -> 2 callbacks
+ * 2 -> 1 callbacks
+ * Other updates all behave the same, just like the 2 -> 3 or 3 -> 2 updates.
+ * Site effect : marker_set_format may delete the marker entry (creating a
+ * replacement).
+ */
+void marker_update_probes(void)
+{
+ /* Core kernel markers */
+ marker_update_probe_range(__start___markers, __stop___markers);
+ /* Markers in modules. */
+ module_update_markers();
+ tracepoint_probe_update_all();
+ /* Update immediate values */
+ core_imv_update();
+ module_imv_update();
+}
+
+/**
+ * marker_probe_register - Connect a probe to a marker
+ * @channel: marker channel
+ * @name: marker name
+ * @format: format string
+ * @probe: probe handler
+ * @probe_private: probe private data
+ *
+ * private data must be a valid allocated memory address, or NULL.
+ * Returns 0 if ok, error value on error.
+ * The probe address must at least be aligned on the architecture pointer size.
+ */
+int marker_probe_register(const char *channel, const char *name,
+ const char *format, marker_probe_func *probe,
+ void *probe_private)
+{
+ struct marker_entry *entry;
+ int ret = 0, ret_err;
+ struct marker_probe_array *old;
+ int first_probe = 0;
+
+ mutex_lock(&markers_mutex);
+ entry = get_marker(channel, name);
+ if (!entry) {
+ first_probe = 1;
+ entry = add_marker(channel, name, format);
+ if (IS_ERR(entry))
+ ret = PTR_ERR(entry);
+ if (ret)
+ goto end;
+ ret = ltt_channels_register(channel);
+ if (ret)
+ goto error_remove_marker;
+ ret = ltt_channels_get_index_from_name(channel);
+ if (ret < 0)
+ goto error_unregister_channel;
+ entry->channel_id = ret;
+ ret = ltt_channels_get_event_id(channel, name);
+ if (ret < 0)
+ goto error_unregister_channel;
+ entry->event_id = ret;
+ hlist_add_head(&entry->id_list, id_table + hash_32(
+ (entry->channel_id << 16) | entry->event_id,
+ MARKER_HASH_BITS));
+ ret = 0;
+ trace_mark(metadata, core_marker_id,
+ "channel %s name %s event_id %hu "
+ "int #1u%zu long #1u%zu pointer #1u%zu "
+ "size_t #1u%zu alignment #1u%u",
+ channel, name, entry->event_id,
+ sizeof(int), sizeof(long), sizeof(void *),
+ sizeof(size_t), ltt_get_alignment());
+ } else if (format) {
+ if (!entry->format)
+ ret = marker_set_format(entry, format);
+ else if (strcmp(entry->format, format))
+ ret = -EPERM;
+ if (ret)
+ goto end;
+ }
+
+ old = marker_entry_add_probe(entry, probe, probe_private);
+ if (IS_ERR(old)) {
+ ret = PTR_ERR(old);
+ if (first_probe)
+ goto error_unregister_channel;
+ else
+ goto end;
+ }
+ mutex_unlock(&markers_mutex);
+
+ marker_update_probes();
+ if (old)
+ call_rcu_sched(&old->rcu, free_old_closure);
+ return ret;
+
+error_unregister_channel:
+ ret_err = ltt_channels_unregister(channel, 1);
+ WARN_ON(ret_err);
+error_remove_marker:
+ ret_err = remove_marker(channel, name, 0, 0);
+ WARN_ON(ret_err);
+end:
+ mutex_unlock(&markers_mutex);
+ marker_update_probes(); /* for compaction on error path */
+ return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_register);
+
+/**
+ * marker_probe_unregister - Disconnect a probe from a marker
+ * @channel: marker channel
+ * @name: marker name
+ * @probe: probe function pointer
+ * @probe_private: probe private data
+ *
+ * Returns the private data given to marker_probe_register, or an ERR_PTR().
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int marker_probe_unregister(const char *channel, const char *name,
+ marker_probe_func *probe, void *probe_private)
+{
+ struct marker_entry *entry;
+ struct marker_probe_array *old;
+ int ret = 0;
+
+ mutex_lock(&markers_mutex);
+ entry = get_marker(channel, name);
+ if (!entry) {
+ ret = -ENOENT;
+ goto end;
+ }
+ old = marker_entry_remove_probe(entry, probe, probe_private);
+ remove_marker(channel, name, 1, 0); /* Ignore busy error message */
+ mutex_unlock(&markers_mutex);
+
+ marker_update_probes();
+ if (old)
+ call_rcu_sched(&old->rcu, free_old_closure);
+ return ret;
+
+end:
+ mutex_unlock(&markers_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister);
+
+static struct marker_entry *
+get_marker_from_private_data(marker_probe_func *probe, void *probe_private)
+{
+ struct marker_entry *entry;
+ unsigned int i;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+ head = &marker_table[i];
+ hlist_for_each_entry(entry, node, head, hlist) {
+ if (!entry->ptype) {
+ if (entry->single.func == probe
+ && entry->single.probe_private
+ == probe_private)
+ return entry;
+ } else {
+ struct marker_probe_array *closure;
+ closure = entry->multi;
+ for (i = 0; closure->c[i].func; i++) {
+ if (closure->c[i].func == probe &&
+ closure->c[i].probe_private
+ == probe_private)
+ return entry;
+ }
+ }
+ }
+ }
+ return NULL;
+}
+
+/**
+ * marker_probe_unregister_private_data - Disconnect a probe from a marker
+ * @probe: probe function
+ * @probe_private: probe private data
+ *
+ * Unregister a probe by providing the registered private data.
+ * Only removes the first marker found in hash table.
+ * Return 0 on success or error value.
+ * We do not need to call a synchronize_sched to make sure the probes have
+ * finished running before doing a module unload, because the module unload
+ * itself uses stop_machine(), which insures that every preempt disabled section
+ * have finished.
+ */
+int marker_probe_unregister_private_data(marker_probe_func *probe,
+ void *probe_private)
+{
+ struct marker_entry *entry;
+ int ret = 0;
+ struct marker_probe_array *old;
+ const char *channel = NULL, *name = NULL;
+
+ mutex_lock(&markers_mutex);
+ entry = get_marker_from_private_data(probe, probe_private);
+ if (!entry) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+ old = marker_entry_remove_probe(entry, NULL, probe_private);
+ channel = kstrdup(entry->channel, GFP_KERNEL);
+ name = kstrdup(entry->name, GFP_KERNEL);
+ remove_marker(channel, name, 1, 0); /* Ignore busy error message */
+ mutex_unlock(&markers_mutex);
+
+ marker_update_probes();
+ if (old)
+ call_rcu_sched(&old->rcu, free_old_closure);
+ goto end;
+
+unlock:
+ mutex_unlock(&markers_mutex);
+end:
+ kfree(channel);
+ kfree(name);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(marker_probe_unregister_private_data);
+
+/**
+ * marker_get_private_data - Get a marker's probe private data
+ * @channel: marker channel
+ * @name: marker name
+ * @probe: probe to match
+ * @num: get the nth matching probe's private data
+ *
+ * Returns the nth private data pointer (starting from 0) matching, or an
+ * ERR_PTR.
+ * Returns the private data pointer, or an ERR_PTR.
+ * The private data pointer should _only_ be dereferenced if the caller is the
+ * owner of the data, or its content could vanish. This is mostly used to
+ * confirm that a caller is the owner of a registered probe.
+ */
+void *marker_get_private_data(const char *channel, const char *name,
+ marker_probe_func *probe, int num)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct marker_entry *e;
+ size_t channel_len = strlen(channel) + 1;
+ size_t name_len = strlen(name) + 1;
+ int i;
+ u32 hash;
+
+ hash = jhash(channel, channel_len-1, 0) ^ jhash(name, name_len-1, 0);
+ head = &marker_table[hash & ((1 << MARKER_HASH_BITS)-1)];
+ hlist_for_each_entry(e, node, head, hlist) {
+ if (!strcmp(channel, e->channel) && !strcmp(name, e->name)) {
+ if (!e->ptype) {
+ if (num == 0 && e->single.func == probe)
+ return e->single.probe_private;
+ } else {
+ struct marker_probe_array *closure;
+ int match = 0;
+ closure = e->multi;
+ for (i = 0; closure->c[i].func; i++) {
+ if (closure->c[i].func != probe)
+ continue;
+ if (match++ == num)
+ return closure->c[i].probe_private;
+ }
+ }
+ break;
+ }
+ }
+ return ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(marker_get_private_data);
+
+static struct marker_entry *get_entry_from_id(u16 channel_id, u16 event_id)
+{
+ struct hlist_head *head;
+ struct hlist_node *node;
+ struct marker_entry *e, *found = NULL;
+ u32 hash = hash_32((channel_id << 16) | event_id, MARKER_HASH_BITS);
+
+ mutex_lock(&markers_mutex);
+ head = id_table + hash;
+ hlist_for_each_entry(e, node, head, id_list) {
+ if (e->channel_id == channel_id && e->event_id == event_id) {
+ found = e;
+ break;
+ }
+ }
+ mutex_unlock(&markers_mutex);
+ return found;
+}
+
+/* must call when ids/marker_entry are kept alive */
+const char *marker_get_name_from_id(u16 channel_id, u16 event_id)
+{
+ struct marker_entry *e = get_entry_from_id(channel_id, event_id);
+ return e ? e->name : NULL;
+}
+EXPORT_SYMBOL_GPL(marker_get_name_from_id);
+
+const char *marker_get_fmt_from_id(u16 channel_id, u16 event_id)
+{
+ struct marker_entry *e = get_entry_from_id(channel_id, event_id);
+ return e ? e->format : NULL;
+}
+EXPORT_SYMBOL_GPL(marker_get_fmt_from_id);
+
+/**
+ * markers_compact_event_ids - Compact markers event IDs and reassign channels
+ *
+ * Called when no channel users are active by the channel infrastructure.
+ * Called with trace lock, lock_markers() and channel mutex held.
+ *
+ * marker_update_probes() must be executed after compaction before releasing the
+ * trace lock.
+ */
+void markers_compact_event_ids(void)
+{
+ struct marker_entry *entry;
+ unsigned int i;
+ struct hlist_head *head;
+ struct hlist_node *node, *next;
+ int ret;
+
+ _ltt_channels_reset_event_ids();
+
+ for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+ head = &marker_table[i];
+ hlist_for_each_entry_safe(entry, node, next, head, hlist) {
+ if (!entry->refcount) {
+ remove_marker(entry->channel, entry->name,
+ 1, 1);
+ continue;
+ }
+ ret = ltt_channels_get_index_from_name(entry->channel);
+ WARN_ON(ret < 0);
+ entry->channel_id = ret;
+ ret = _ltt_channels_get_event_id(entry->channel,
+ entry->name);
+ WARN_ON(ret < 0);
+ entry->event_id = ret;
+ }
+ }
+
+ memset(id_table, 0, sizeof(id_table));
+ for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+ head = &marker_table[i];
+ hlist_for_each_entry(entry, node, head, hlist) {
+ hlist_add_head(&entry->id_list, id_table + hash_32(
+ (entry->channel_id << 16)
+ | entry->event_id, MARKER_HASH_BITS));
+ }
+ }
+}
+
+#ifdef CONFIG_MODULES
+
+/**
+ * marker_get_iter_range - Get a next marker iterator given a range.
+ * @marker: current markers (in), next marker (out)
+ * @begin: beginning of the range
+ * @end: end of the range
+ *
+ * Returns whether a next marker has been found (1) or not (0).
+ * Will return the first marker in the range if the input marker is NULL.
+ */
+int marker_get_iter_range(struct marker **marker, struct marker *begin,
+ struct marker *end)
+{
+ if (!*marker && begin != end) {
+ *marker = begin;
+ return 1;
+ }
+ if (*marker >= begin && *marker < end)
+ return 1;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(marker_get_iter_range);
+
+static void marker_get_iter(struct marker_iter *iter)
+{
+ int found = 0;
+
+ /* Core kernel markers */
+ if (!iter->module) {
+ found = marker_get_iter_range(&iter->marker,
+ __start___markers, __stop___markers);
+ if (found)
+ goto end;
+ }
+ /* Markers in modules. */
+ found = module_get_iter_markers(iter);
+end:
+ if (!found)
+ marker_iter_reset(iter);
+}
+
+void marker_iter_start(struct marker_iter *iter)
+{
+ marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_start);
+
+void marker_iter_next(struct marker_iter *iter)
+{
+ iter->marker++;
+ /*
+ * iter->marker may be invalid because we blindly incremented it.
+ * Make sure it is valid by marshalling on the markers, getting the
+ * markers from following modules if necessary.
+ */
+ marker_get_iter(iter);
+}
+EXPORT_SYMBOL_GPL(marker_iter_next);
+
+void marker_iter_stop(struct marker_iter *iter)
+{
+}
+EXPORT_SYMBOL_GPL(marker_iter_stop);
+
+void marker_iter_reset(struct marker_iter *iter)
+{
+ iter->module = NULL;
+ iter->marker = NULL;
+}
+EXPORT_SYMBOL_GPL(marker_iter_reset);
+
+int marker_module_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ struct module *mod = data;
+
+ switch (val) {
+ case MODULE_STATE_COMING:
+ marker_update_probe_range(mod->markers,
+ mod->markers + mod->num_markers);
+ break;
+ case MODULE_STATE_GOING:
+ marker_update_probe_range(mod->markers,
+ mod->markers + mod->num_markers);
+ break;
+ }
+ return 0;
+}
+
+struct notifier_block marker_module_nb = {
+ .notifier_call = marker_module_notify,
+ .priority = 0,
+};
+
+static int init_markers(void)
+{
+ return register_module_notifier(&marker_module_nb);
+}
+__initcall(init_markers);
+
+#endif /* CONFIG_MODULES */
+
+void ltt_dump_marker_state(struct ltt_trace *trace)
+{
+ struct marker_entry *entry;
+ struct ltt_probe_private_data call_data;
+ struct hlist_head *head;
+ struct hlist_node *node;
+ unsigned int i;
+
+ mutex_lock(&markers_mutex);
+ call_data.trace = trace;
+ call_data.serializer = NULL;
+
+ for (i = 0; i < MARKER_TABLE_SIZE; i++) {
+ head = &marker_table[i];
+ hlist_for_each_entry(entry, node, head, hlist) {
+ __trace_mark(0, metadata, core_marker_id,
+ &call_data,
+ "channel %s name %s event_id %hu "
+ "int #1u%zu long #1u%zu pointer #1u%zu "
+ "size_t #1u%zu alignment #1u%u",
+ entry->channel,
+ entry->name,
+ entry->event_id,
+ sizeof(int), sizeof(long),
+ sizeof(void *), sizeof(size_t),
+ ltt_get_alignment());
+ if (entry->format)
+ __trace_mark(0, metadata,
+ core_marker_format,
+ &call_data,
+ "channel %s name %s format %s",
+ entry->channel,
+ entry->name,
+ entry->format);
+ }
+ }
+ mutex_unlock(&markers_mutex);
+}
+EXPORT_SYMBOL_GPL(ltt_dump_marker_state);
diff --git a/kernel/module.c b/kernel/module.c
index efa290ea94b..0ea12e0d472 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -56,6 +56,7 @@
#include <linux/percpu.h>
#include <linux/kmemleak.h>
#include <linux/jump_label.h>
+#include <trace/kernel.h>
#include <linux/pfn.h>
#define CREATE_TRACE_POINTS
@@ -99,7 +100,9 @@
* 1) List of modules (also safely readable with preempt_disable),
* 2) module_use links,
* 3) module_addr_min/module_addr_max.
- * (delete uses stop_machine/add uses RCU list operations). */
+ * (delete uses stop_machine/add uses RCU list operations).
+ * Sorted by ascending list node address.
+ */
DEFINE_MUTEX(module_mutex);
EXPORT_SYMBOL_GPL(module_mutex);
static LIST_HEAD(modules);
@@ -120,6 +123,9 @@ static BLOCKING_NOTIFIER_HEAD(module_notify_list);
* Protected by module_mutex. */
static unsigned long module_addr_min = -1UL, module_addr_max = 0;
+DEFINE_TRACE(kernel_module_load);
+DEFINE_TRACE(kernel_module_free);
+
int register_module_notifier(struct notifier_block * nb)
{
return blocking_notifier_chain_register(&module_notify_list, nb);
@@ -1675,6 +1681,7 @@ static inline void unset_section_ro_nx(struct module *mod, void *module_region)
/* Free a module, remove from lists, etc. */
static void free_module(struct module *mod)
{
+ trace_kernel_module_free(mod);
trace_module_free(mod);
/* Delete from various lists */
@@ -2272,6 +2279,12 @@ static int copy_and_check(struct load_info *info,
if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
return -ENOMEM;
+ /*
+ * Make sure the module text or data access never generates any page
+ * fault.
+ */
+ vmalloc_sync_all();
+
if (copy_from_user(hdr, umod, len) != 0) {
err = -EFAULT;
goto free_hdr;
@@ -2459,6 +2472,10 @@ static void find_module_sections(struct module *mod, struct load_info *info)
sizeof(*mod->ctors), &mod->num_ctors);
#endif
+#ifdef CONFIG_MARKERS
+ mod->markers = section_objs(info, "__markers",
+ sizeof(*mod->markers), &mod->num_markers);
+#endif
#ifdef CONFIG_TRACEPOINTS
mod->tracepoints_ptrs = section_objs(info, "__tracepoints_ptrs",
sizeof(*mod->tracepoints_ptrs),
@@ -2717,7 +2734,7 @@ static struct module *load_module(void __user *umod,
const char __user *uargs)
{
struct load_info info = { NULL, };
- struct module *mod;
+ struct module *mod, *iter;
long err;
DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
@@ -2799,7 +2816,23 @@ static struct module *load_module(void __user *umod,
goto ddebug;
module_bug_finalize(info.hdr, info.sechdrs, mod);
+ /*
+ * We sort the modules by struct module pointer address to permit
+ * correct iteration over modules of, at least, kallsyms for preemptible
+ * operations, such as read(). Sorting by struct module pointer address
+ * is equivalent to sort by list node address.
+ */
+ list_for_each_entry_reverse(iter, &modules, list) {
+ BUG_ON(iter == mod); /* Should never be in the list twice */
+ if (iter < mod) {
+ /* We belong to the location right after iter. */
+ list_add_rcu(&mod->list, &iter->list);
+ goto module_added;
+ }
+ }
+ /* We should be added at the head of the list */
list_add_rcu(&mod->list, &modules);
+module_added:
mutex_unlock(&module_mutex);
/* Module is ready to execute: parsing args may do that. */
@@ -2817,6 +2850,7 @@ static struct module *load_module(void __user *umod,
free_copy(&info);
/* Done! */
+ trace_kernel_module_load(mod);
trace_module_load(mod);
return mod;
@@ -3196,12 +3230,12 @@ static char *module_flags(struct module *mod, char *buf)
static void *m_start(struct seq_file *m, loff_t *pos)
{
mutex_lock(&module_mutex);
- return seq_list_start(&modules, *pos);
+ return seq_sorted_list_start(&modules, pos);
}
static void *m_next(struct seq_file *m, void *p, loff_t *pos)
{
- return seq_list_next(p, &modules, pos);
+ return seq_sorted_list_next(p, &modules, pos);
}
static void m_stop(struct seq_file *m, void *p)
@@ -3266,6 +3300,27 @@ static int __init proc_modules_init(void)
module_init(proc_modules_init);
#endif
+void list_modules(void *call_data)
+{
+ /* Enumerate loaded modules */
+ struct list_head *i;
+ struct module *mod;
+ unsigned long refcount = 0;
+
+ mutex_lock(&module_mutex);
+ list_for_each(i, &modules) {
+ mod = list_entry(i, struct module, list);
+#ifdef CONFIG_MODULE_UNLOAD
+ refcount = module_refcount(mod);
+#endif
+ __trace_mark(0, module_state, list_module, call_data,
+ "name %s state %d refcount %lu",
+ mod->name, mod->state, refcount);
+ }
+ mutex_unlock(&module_mutex);
+}
+EXPORT_SYMBOL_GPL(list_modules);
+
/* Given an address, look for it in the module exception tables. */
const struct exception_table_entry *search_module_extables(unsigned long addr)
{
@@ -3393,12 +3448,59 @@ void module_layout(struct module *mod,
struct modversion_info *ver,
struct kernel_param *kp,
struct kernel_symbol *ks,
+ struct marker *marker,
struct tracepoint * const *tp)
{
}
EXPORT_SYMBOL(module_layout);
#endif
+#ifdef CONFIG_MARKERS
+void module_update_markers(void)
+{
+ struct module *mod;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(mod, &modules, list)
+ if (!(mod->taints & TAINT_FORCED_MODULE))
+ marker_update_probe_range(mod->markers,
+ mod->markers + mod->num_markers);
+ mutex_unlock(&module_mutex);
+}
+
+/*
+ * Returns 0 if current not found.
+ * Returns 1 if current found.
+ */
+int module_get_iter_markers(struct marker_iter *iter)
+{
+ struct module *iter_mod;
+ int found = 0;
+
+ mutex_lock(&module_mutex);
+ list_for_each_entry(iter_mod, &modules, list) {
+ if (!(iter_mod->taints & TAINT_FORCED_MODULE)) {
+ /*
+ * Sorted module list
+ */
+ if (iter_mod < iter->module)
+ continue;
+ else if (iter_mod > iter->module)
+ iter->marker = NULL;
+ found = marker_get_iter_range(&iter->marker,
+ iter_mod->markers,
+ iter_mod->markers + iter_mod->num_markers);
+ if (found) {
+ iter->module = iter_mod;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&module_mutex);
+ return found;
+}
+#endif
+
#ifdef CONFIG_TRACEPOINTS
void module_update_tracepoints(void)
{
diff --git a/kernel/notifier.c b/kernel/notifier.c
index 2488ba7eb56..e8481427153 100644
--- a/kernel/notifier.c
+++ b/kernel/notifier.c
@@ -5,6 +5,7 @@
#include <linux/rcupdate.h>
#include <linux/vmalloc.h>
#include <linux/reboot.h>
+#include <linux/idle.h>
/*
* Notifier list for kernel code which wants to be called
@@ -148,7 +149,7 @@ int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
spin_lock_irqsave(&nh->lock, flags);
ret = notifier_chain_unregister(&nh->head, n);
spin_unlock_irqrestore(&nh->lock, flags);
- synchronize_rcu();
+ synchronize_sched();
return ret;
}
EXPORT_SYMBOL_GPL(atomic_notifier_chain_unregister);
@@ -178,9 +179,9 @@ int __kprobes __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
{
int ret;
- rcu_read_lock();
+ rcu_read_lock_sched_notrace();
ret = notifier_call_chain(&nh->head, val, v, nr_to_call, nr_calls);
- rcu_read_unlock();
+ rcu_read_unlock_sched_notrace();
return ret;
}
EXPORT_SYMBOL_GPL(__atomic_notifier_call_chain);
@@ -584,3 +585,27 @@ int unregister_die_notifier(struct notifier_block *nb)
return atomic_notifier_chain_unregister(&die_chain, nb);
}
EXPORT_SYMBOL_GPL(unregister_die_notifier);
+
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+/*
+ * Trace last event before calling notifiers. Notifiers flush data from buffers
+ * before going to idle.
+ */
+int notrace notify_idle(enum idle_val val)
+{
+ return atomic_notifier_call_chain(&idle_notifier, val, NULL);
+}
+EXPORT_SYMBOL_GPL(notify_idle);
+
+void register_idle_notifier(struct notifier_block *n)
+{
+ atomic_notifier_chain_register(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(register_idle_notifier);
+
+void unregister_idle_notifier(struct notifier_block *n)
+{
+ atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(unregister_idle_notifier);
diff --git a/kernel/panic.c b/kernel/panic.c
index 991bb87a170..3fd05f5708c 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -23,6 +23,9 @@
#include <linux/init.h>
#include <linux/nmi.h>
#include <linux/dmi.h>
+#include <trace/kernel.h>
+
+DEFINE_TRACE(kernel_panic);
#define PANIC_TIMER_STEP 100
#define PANIC_BLINK_SPD 18
@@ -64,6 +67,10 @@ NORET_TYPE void panic(const char * fmt, ...)
long i, i_next = 0;
int state = 0;
+ va_start(args, fmt);
+ trace_kernel_panic(fmt, args);
+ va_end(args);
+
/*
* It's possible to come here directly from a panic-assertion and
* not have preempt disabled. Some functions called from here want
diff --git a/kernel/printk.c b/kernel/printk.c
index 36231525e22..5b87a0ce089 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -39,6 +39,7 @@
#include <linux/syslog.h>
#include <linux/cpu.h>
#include <linux/notifier.h>
+#include <trace/kernel.h>
#include <linux/rculist.h>
#include <asm/uaccess.h>
@@ -67,6 +68,7 @@ int console_printk[4] = {
MINIMUM_CONSOLE_LOGLEVEL, /* minimum_console_loglevel */
DEFAULT_CONSOLE_LOGLEVEL, /* default_console_loglevel */
};
+EXPORT_SYMBOL_GPL(console_printk);
/*
* Low level drivers may need that to know if they can schedule in
@@ -136,6 +138,9 @@ EXPORT_SYMBOL(console_set_on_cmdline);
/* Flag: console code may call schedule() */
static int console_may_schedule;
+DEFINE_TRACE(kernel_printk);
+DEFINE_TRACE(kernel_vprintk);
+
#ifdef CONFIG_PRINTK
static char __log_buf[__LOG_BUF_LEN];
@@ -650,6 +655,7 @@ asmlinkage int printk(const char *fmt, ...)
}
#endif
va_start(args, fmt);
+ trace_kernel_printk(_RET_IP_);
r = vprintk(fmt, args);
va_end(args);
@@ -773,6 +779,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
printed_len += vscnprintf(printk_buf + printed_len,
sizeof(printk_buf) - printed_len, fmt, args);
+ trace_kernel_vprintk(_RET_IP_, printk_buf, printed_len);
p = printk_buf;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index dd4aea806f8..a86e46b6bc1 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -47,6 +47,7 @@
#include <linux/mutex.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
+#include <trace/rcu.h>
#include "rcutree.h"
@@ -145,6 +146,10 @@ int rcu_cpu_stall_suppress __read_mostly = RCU_CPU_STALL_SUPPRESS_INIT;
module_param(rcu_cpu_stall_suppress, int, 0644);
#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+DEFINE_TRACE(rcu_tree_call_rcu);
+DEFINE_TRACE(rcu_tree_call_rcu_bh);
+DEFINE_TRACE(rcu_tree_callback);
+
static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
static int rcu_pending(int cpu);
@@ -1143,6 +1148,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
next = list->next;
prefetch(next);
debug_rcu_head_unqueue(list);
+ trace_rcu_tree_callback(list);
list->func(list);
list = next;
if (++count >= rdp->blimit)
@@ -1488,6 +1494,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
*/
void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
{
+ trace_rcu_tree_call_rcu_bh(head, _RET_IP_);
__call_rcu(head, func, &rcu_bh_state);
}
EXPORT_SYMBOL_GPL(call_rcu_bh);
diff --git a/kernel/sched.c b/kernel/sched.c
index 18d38e4ec7b..44d889a4d48 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -9254,3 +9254,57 @@ struct cgroup_subsys cpuacct_subsys = {
};
#endif /* CONFIG_CGROUP_CPUACCT */
+static DEFINE_MUTEX(kernel_trace_mutex);
+static int kernel_trace_refcount;
+
+/**
+ * clear_kernel_trace_flag_all_tasks - clears all TIF_KERNEL_TRACE thread flags.
+ *
+ * This function iterates on all threads in the system to clear their
+ * TIF_KERNEL_TRACE flag. Setting the TIF_KERNEL_TRACE flag with the
+ * tasklist_lock held in copy_process() makes sure that once we finish clearing
+ * the thread flags, all threads have their flags cleared.
+ */
+void clear_kernel_trace_flag_all_tasks(void)
+{
+ struct task_struct *p;
+ struct task_struct *t;
+
+ mutex_lock(&kernel_trace_mutex);
+ if (--kernel_trace_refcount)
+ goto end;
+ read_lock(&tasklist_lock);
+ do_each_thread(p, t) {
+ clear_tsk_thread_flag(t, TIF_KERNEL_TRACE);
+ } while_each_thread(p, t);
+ read_unlock(&tasklist_lock);
+end:
+ mutex_unlock(&kernel_trace_mutex);
+}
+EXPORT_SYMBOL_GPL(clear_kernel_trace_flag_all_tasks);
+
+/**
+ * set_kernel_trace_flag_all_tasks - sets all TIF_KERNEL_TRACE thread flags.
+ *
+ * This function iterates on all threads in the system to set their
+ * TIF_KERNEL_TRACE flag. Setting the TIF_KERNEL_TRACE flag with the
+ * tasklist_lock held in copy_process() makes sure that once we finish setting
+ * the thread flags, all threads have their flags set.
+ */
+void set_kernel_trace_flag_all_tasks(void)
+{
+ struct task_struct *p;
+ struct task_struct *t;
+
+ mutex_lock(&kernel_trace_mutex);
+ if (kernel_trace_refcount++)
+ goto end;
+ read_lock(&tasklist_lock);
+ do_each_thread(p, t) {
+ set_tsk_thread_flag(t, TIF_KERNEL_TRACE);
+ } while_each_thread(p, t);
+ read_unlock(&tasklist_lock);
+end:
+ mutex_unlock(&kernel_trace_mutex);
+}
+EXPORT_SYMBOL_GPL(set_kernel_trace_flag_all_tasks);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 68eb5efec38..a25bf611d13 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -23,7 +23,10 @@
#include <linux/rcupdate.h>
#include <linux/ftrace.h>
#include <linux/smp.h>
+#include <linux/marker.h>
+#include <linux/kallsyms.h>
#include <linux/tick.h>
+#include <trace/irq.h>
#define CREATE_TRACE_POINTS
#include <trace/events/irq.h>
@@ -54,6 +57,20 @@ EXPORT_SYMBOL(irq_stat);
static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
+void ltt_dump_softirq_vec(void *call_data)
+{
+ int i;
+ char namebuf[KSYM_NAME_LEN];
+
+ for (i = 0; i < 32; i++) {
+ sprint_symbol(namebuf, (unsigned long)softirq_vec[i].action);
+ __trace_mark(0, softirq_state, softirq_vec, call_data,
+ "id %d address %p symbol %s",
+ i, softirq_vec[i].action, namebuf);
+ }
+}
+EXPORT_SYMBOL_GPL(ltt_dump_softirq_vec);
+
static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
char *softirq_to_name[NR_SOFTIRQS] = {
@@ -61,6 +78,11 @@ char *softirq_to_name[NR_SOFTIRQS] = {
"TASKLET", "SCHED", "HRTIMER", "RCU"
};
+DEFINE_TRACE(irq_tasklet_high_entry);
+DEFINE_TRACE(irq_tasklet_high_exit);
+DEFINE_TRACE(irq_tasklet_low_entry);
+DEFINE_TRACE(irq_tasklet_low_exit);
+
/*
* we cannot loop indefinitely here to avoid userspace starvation,
* but we also don't want to introduce a worst case 1/HZ latency
@@ -341,6 +363,7 @@ void irq_exit(void)
*/
inline void raise_softirq_irqoff(unsigned int nr)
{
+ trace_softirq_raise(nr);
__raise_softirq_irqoff(nr);
/*
@@ -440,7 +463,9 @@ static void tasklet_action(struct softirq_action *a)
if (!atomic_read(&t->count)) {
if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
BUG();
+ trace_irq_tasklet_low_entry(t);
t->func(t->data);
+ trace_irq_tasklet_low_exit(t);
tasklet_unlock(t);
continue;
}
@@ -475,7 +500,9 @@ static void tasklet_hi_action(struct softirq_action *a)
if (!atomic_read(&t->count)) {
if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
BUG();
+ trace_irq_tasklet_high_entry(t);
t->func(t->data);
+ trace_irq_tasklet_high_exit(t);
tasklet_unlock(t);
continue;
}
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index ee266620b06..dbaa0648631 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += tick-broadcast.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o
obj-$(CONFIG_TICK_ONESHOT) += tick-sched.o
obj-$(CONFIG_TIMER_STATS) += timer_stats.o
+obj-$(CONFIG_HAVE_UNSYNCHRONIZED_TSC) += tsc-sync.o
diff --git a/kernel/time/tsc-sync.c b/kernel/time/tsc-sync.c
new file mode 100644
index 00000000000..2ac1544ee22
--- /dev/null
+++ b/kernel/time/tsc-sync.c
@@ -0,0 +1,313 @@
+/*
+ * kernel/time/tsc-sync.c
+ *
+ * Test TSC synchronization
+ *
+ * marks the tsc as unstable _and_ keep a simple "_tsc_is_sync" variable, which
+ * is fast to read when a simple test must determine which clock source to use
+ * for kernel tracing.
+ *
+ * - CPU init :
+ *
+ * We check whether all boot CPUs have their TSC's synchronized,
+ * print a warning if not and turn off the TSC clock-source.
+ *
+ * Only two CPUs may participate - they can enter in any order.
+ * ( The serial nature of the boot logic and the CPU hotplug lock
+ * protects against more than 2 CPUs entering this code.
+ *
+ * - When CPUs are up :
+ *
+ * TSC synchronicity of all CPUs can be checked later at run-time by calling
+ * test_tsc_synchronization().
+ *
+ * Copyright 2007, 2008
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ */
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/timex.h>
+#include <linux/jiffies.h>
+#include <linux/trace-clock.h>
+#include <linux/cpu.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/cpu.h>
+
+#define MAX_CYCLES_DELTA 3000ULL
+
+/*
+ * Number of loops to take care of MCE, NMIs, SMIs.
+ */
+#define NR_LOOPS 200
+
+static DEFINE_MUTEX(tscsync_mutex);
+
+struct sync_data {
+ int nr_waits;
+ int wait_sync;
+ cycles_t tsc_count;
+} ____cacheline_aligned;
+
+/* 0 is master, 1 is slave */
+static struct sync_data sync_data[2] = {
+ [0 ... 1] = {
+ .nr_waits = 3 * NR_LOOPS + 1,
+ .wait_sync = 3 * NR_LOOPS + 1,
+ },
+};
+
+int _tsc_is_sync = 1;
+EXPORT_SYMBOL(_tsc_is_sync);
+
+static int force_tsc_sync;
+static cycles_t slave_offset;
+static int slave_offset_ready; /* for 32-bits architectures */
+
+static int __init force_tsc_sync_setup(char *str)
+{
+ force_tsc_sync = simple_strtoul(str, NULL, 0);
+ return 1;
+}
+__setup("force_tsc_sync=", force_tsc_sync_setup);
+
+/*
+ * Mark it noinline so we make sure it is not unrolled.
+ * Wait until value is reached.
+ */
+static noinline void tsc_barrier(long this_cpu)
+{
+ sync_core();
+ sync_data[this_cpu].wait_sync--;
+ smp_mb(); /* order master/slave sync_data read/write */
+ while (unlikely(sync_data[1 - this_cpu].wait_sync >=
+ sync_data[this_cpu].nr_waits))
+ barrier(); /*
+ * barrier is used because faster and
+ * more predictable than cpu_idle().
+ */
+ smp_mb(); /* order master/slave sync_data read/write */
+ sync_data[this_cpu].nr_waits--;
+ get_cycles_barrier();
+ sync_data[this_cpu].tsc_count = get_cycles();
+ get_cycles_barrier();
+}
+
+/*
+ * Worker thread called on each CPU.
+ * First wait with interrupts enabled, then wait with interrupt disabled,
+ * for precision. We are already bound to one CPU.
+ * this_cpu 0 : master
+ * this_cpu 1 : slave
+ */
+static void test_sync(void *arg)
+{
+ long this_cpu = (long)arg;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ /* Make sure the instructions are in I-CACHE */
+ tsc_barrier(this_cpu);
+ tsc_barrier(this_cpu);
+ sync_data[this_cpu].wait_sync--;
+ smp_mb(); /* order master/slave sync_data read/write */
+ while (unlikely(sync_data[1 - this_cpu].wait_sync >=
+ sync_data[this_cpu].nr_waits))
+ barrier(); /*
+ * barrier is used because faster and
+ * more predictable than cpu_idle().
+ */
+ smp_mb(); /* order master/slave sync_data read/write */
+ sync_data[this_cpu].nr_waits--;
+ /*
+ * Here, only the master will wait for the slave to reach this barrier.
+ * This makes sure that the master, which holds the mutex and will reset
+ * the barriers, waits for the slave to stop using the barrier values
+ * before it continues. This is only done at the complete end of all the
+ * loops. This is why there is a + 1 in original wait_sync value.
+ */
+ if (sync_data[this_cpu].nr_waits == 1)
+ sync_data[this_cpu].wait_sync--;
+ local_irq_restore(flags);
+}
+
+/*
+ * Each CPU (master and target) must decrement the wait_sync value twice (one
+ * for priming in cache), and also once after the get_cycles. After all the
+ * loops, one last synchronization is required to make sure the master waits
+ * for the slave before resetting the barriers.
+ */
+static void reset_barriers(void)
+{
+ int i;
+
+ /*
+ * Wait until slave is done so that we don't overwrite
+ * wait_end_sync prematurely.
+ */
+ smp_mb(); /* order master/slave sync_data read/write */
+ while (unlikely(sync_data[1].wait_sync >= sync_data[0].nr_waits))
+ barrier(); /*
+ * barrier is used because faster and
+ * more predictable than cpu_idle().
+ */
+ smp_mb(); /* order master/slave sync_data read/write */
+
+ for (i = 0; i < 2; i++) {
+ WARN_ON(sync_data[i].wait_sync != 0);
+ WARN_ON(sync_data[i].nr_waits != 1);
+ sync_data[i].wait_sync = 3 * NR_LOOPS + 1;
+ sync_data[i].nr_waits = 3 * NR_LOOPS + 1;
+ }
+}
+
+/*
+ * Do loops (making sure no unexpected event changes the timing), keep the best
+ * one. The result of each loop is the highest tsc delta between the master CPU
+ * and the slaves. Stop CPU hotplug when this code is executed to make sure we
+ * are concurrency-safe wrt CPU hotplug also using this code. Test TSC
+ * synchronization even if we already "know" CPUs were not synchronized. This
+ * can be used as a test to check if, for some reason, the CPUs eventually got
+ * in sync after a CPU has been unplugged. This code is kept separate from the
+ * CPU hotplug code because the slave CPU executes in an IPI, which we want to
+ * keep as short as possible (this is happening while the system is running).
+ * Therefore, we do not send a single IPI for all the test loops, but rather
+ * send one IPI per loop.
+ */
+int test_tsc_synchronization(void)
+{
+ long cpu, master;
+ cycles_t max_diff = 0, diff, best_loop, worse_loop = 0;
+ int i;
+
+ mutex_lock(&tscsync_mutex);
+ get_online_cpus();
+
+ printk(KERN_INFO
+ "checking TSC synchronization across all online CPUs:");
+
+ preempt_disable();
+ master = smp_processor_id();
+ for_each_online_cpu(cpu) {
+ if (master == cpu)
+ continue;
+ best_loop = (cycles_t)ULLONG_MAX;
+ for (i = 0; i < NR_LOOPS; i++) {
+ smp_call_function_single(cpu, test_sync,
+ (void *)1UL, 0);
+ test_sync((void *)0UL);
+ diff = abs(sync_data[1].tsc_count
+ - sync_data[0].tsc_count);
+ best_loop = min(best_loop, diff);
+ worse_loop = max(worse_loop, diff);
+ }
+ reset_barriers();
+ max_diff = max(best_loop, max_diff);
+ }
+ preempt_enable();
+ if (max_diff >= MAX_CYCLES_DELTA) {
+ printk(KERN_WARNING
+ "Measured %llu cycles TSC offset between CPUs,"
+ " turning off TSC clock.\n", (u64)max_diff);
+ mark_tsc_unstable("check_tsc_sync_source failed");
+ _tsc_is_sync = 0;
+ } else {
+ printk(" passed.\n");
+ }
+ put_online_cpus();
+ mutex_unlock(&tscsync_mutex);
+ return max_diff < MAX_CYCLES_DELTA;
+}
+EXPORT_SYMBOL_GPL(test_tsc_synchronization);
+
+/*
+ * Test synchronicity of a single core when it is hotplugged.
+ * Source CPU calls into this - waits for the freshly booted target CPU to
+ * arrive and then start the measurement:
+ */
+void __cpuinit check_tsc_sync_source(int cpu)
+{
+ cycles_t diff, abs_diff,
+ best_loop = (cycles_t)ULLONG_MAX, worse_loop = 0;
+ int i;
+
+ /*
+ * No need to check if we already know that the TSC is not synchronized:
+ */
+ if (!force_tsc_sync && unsynchronized_tsc()) {
+ /*
+ * Make sure we mark _tsc_is_sync to 0 if the TSC is found
+ * to be unsynchronized for other causes than non-synchronized
+ * TSCs across CPUs.
+ */
+ _tsc_is_sync = 0;
+ set_trace_clock_is_sync(0);
+ return;
+ }
+
+ printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:",
+ smp_processor_id(), cpu);
+
+ for (i = 0; i < NR_LOOPS; i++) {
+ test_sync((void *)0UL);
+ diff = sync_data[1].tsc_count - sync_data[0].tsc_count;
+ abs_diff = abs(diff);
+ best_loop = min(best_loop, abs_diff);
+ worse_loop = max(worse_loop, abs_diff);
+ if (force_tsc_sync && best_loop == abs_diff)
+ slave_offset = diff;
+ }
+ reset_barriers();
+
+ if (!force_tsc_sync && best_loop >= MAX_CYCLES_DELTA) {
+ printk(" failed.\n");
+ printk(KERN_WARNING
+ "Measured %llu cycles TSC offset between CPUs,"
+ " turning off TSC clock.\n", (u64)best_loop);
+ mark_tsc_unstable("check_tsc_sync_source failed");
+ _tsc_is_sync = 0;
+ set_trace_clock_is_sync(0);
+ } else {
+ printk(" %s.\n", !force_tsc_sync ? "passed" : "forced");
+ }
+ if (force_tsc_sync) {
+ /* order slave_offset and slave_offset_ready writes */
+ smp_wmb();
+ slave_offset_ready = 1;
+ }
+}
+
+/*
+ * Freshly booted CPUs call into this:
+ */
+void __cpuinit check_tsc_sync_target(void)
+{
+ int i;
+
+ if (!force_tsc_sync && unsynchronized_tsc())
+ return;
+
+ for (i = 0; i < NR_LOOPS; i++)
+ test_sync((void *)1UL);
+
+ /*
+ * Force slave synchronization if requested.
+ */
+ if (force_tsc_sync) {
+ unsigned long flags;
+ cycles_t new_tsc;
+
+ while (!slave_offset_ready)
+ cpu_relax();
+ /* order slave_offset and slave_offset_ready reads */
+ smp_rmb();
+ local_irq_save(flags);
+ /*
+ * slave_offset is read when master has finished writing to it,
+ * and is protected by cpu hotplug serialization.
+ */
+ new_tsc = get_cycles() - slave_offset;
+ write_tsc((u32)new_tsc, (u32)((u64)new_tsc >> 32));
+ local_irq_restore(flags);
+ }
+}
diff --git a/kernel/timer.c b/kernel/timer.c
index d6459923d24..65cc58ce148 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -40,12 +40,14 @@
#include <linux/irq_work.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <trace/timer.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
#include <asm/div64.h>
#include <asm/timex.h>
#include <asm/io.h>
+#include <asm/irq_regs.h>
#define CREATE_TRACE_POINTS
#include <trace/events/timer.h>
@@ -54,6 +56,10 @@ u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
EXPORT_SYMBOL(jiffies_64);
+DEFINE_TRACE(timer_set);
+DEFINE_TRACE(timer_update_time);
+DEFINE_TRACE(timer_timeout);
+
/*
* per-CPU timer vector definitions:
*/
@@ -366,6 +372,7 @@ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;
vec = base->tv5.vec + i;
}
+ trace_timer_set(timer);
/*
* Timers are FIFO:
*/
@@ -1303,8 +1310,13 @@ void run_local_timers(void)
void do_timer(unsigned long ticks)
{
+ struct timespec curtime, wtom;
+
jiffies_64 += ticks;
update_wall_time();
+ curtime = __current_kernel_time();
+ wtom = __get_wall_to_monotonic();
+ trace_timer_update_time(&curtime, &wtom);
calc_global_load(ticks);
}
@@ -1387,7 +1399,9 @@ SYSCALL_DEFINE0(getegid)
static void process_timeout(unsigned long __data)
{
- wake_up_process((struct task_struct *)__data);
+ struct task_struct *task = (struct task_struct *)__data;
+ trace_timer_timeout(task);
+ wake_up_process(task);
}
/**
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 761c510a06c..614d9153a24 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -56,5 +56,7 @@ obj-$(CONFIG_TRACEPOINTS) += power-traces.o
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
endif
+obj-$(CONFIG_HAVE_TRACE_CLOCK_32_TO_64) += trace-clock-32-to-64.o
+obj-$(CONFIG_HAVE_TRACE_CLOCK_GENERIC) += trace-clock.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/trace-clock-32-to-64.c b/kernel/trace/trace-clock-32-to-64.c
new file mode 100644
index 00000000000..843749a72ac
--- /dev/null
+++ b/kernel/trace/trace-clock-32-to-64.c
@@ -0,0 +1,295 @@
+/*
+ * kernel/trace/trace-clock-32-to-64.c
+ *
+ * (C) Copyright 2006,2007,2008 -
+ * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ *
+ * Extends a 32 bits clock source to a full 64 bits count, readable atomically
+ * from any execution context.
+ *
+ * notes :
+ * - trace clock 32->64 bits extended timer-based clock cannot be used for early
+ * tracing in the boot process, as it depends on timer interrupts.
+ * - The timer is only on one CPU to support hotplug.
+ * - We have the choice between schedule_delayed_work_on and an IPI to get each
+ * CPU to write the heartbeat. IPI has been chosen because it is considered
+ * faster than passing through the timer to get the work scheduled on all the
+ * CPUs.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/cpu.h>
+#include <linux/timex.h>
+#include <linux/bitops.h>
+#include <linux/trace-clock.h>
+#include <linux/smp.h>
+#include <linux/sched.h> /* needed due to include order problem on m68k */
+#include <linux/math64.h>
+
+#define HW_BITMASK ((1ULL << TC_HW_BITS) - 1)
+#define HW_LS32(hw) ((hw) & HW_BITMASK)
+#define SW_MS32(sw) ((sw) & ~HW_BITMASK)
+
+static DEFINE_SPINLOCK(synthetic_tsc_lock);
+static int synthetic_tsc_refcount; /* Number of readers */
+static int synthetic_tsc_enabled; /* synth. TSC enabled on all online CPUs */
+
+static DEFINE_PER_CPU(struct timer_list, tsc_timer);
+static unsigned int precalc_expire;
+
+struct synthetic_tsc_struct {
+ union {
+ u64 val;
+ struct {
+#ifdef __BIG_ENDIAN
+ u32 ms32;
+ u32 ls32;
+#else
+ u32 ls32;
+ u32 ms32;
+#endif
+ } sel;
+ } tsc[2];
+ unsigned int index; /* Index of the current synth. tsc. */
+};
+
+static DEFINE_PER_CPU(struct synthetic_tsc_struct, synthetic_tsc);
+
+/* Called from IPI or timer interrupt */
+static void update_synthetic_tsc(void)
+{
+ struct synthetic_tsc_struct *cpu_synth;
+ u32 tsc;
+
+ cpu_synth = &per_cpu(synthetic_tsc, smp_processor_id());
+ tsc = trace_clock_read32(); /* Hardware clocksource read */
+
+ if (tsc < HW_LS32(cpu_synth->tsc[cpu_synth->index].sel.ls32)) {
+ unsigned int new_index = 1 - cpu_synth->index; /* 0 <-> 1 */
+ /*
+ * Overflow
+ * Non atomic update of the non current synthetic TSC, followed
+ * by an atomic index change. There is no write concurrency,
+ * so the index read/write does not need to be atomic.
+ */
+ cpu_synth->tsc[new_index].val =
+ (SW_MS32(cpu_synth->tsc[cpu_synth->index].val)
+ | (u64)tsc) + (1ULL << TC_HW_BITS);
+ /*
+ * Ensure the compiler does not reorder index write. It makes
+ * sure all nested interrupts will see the new value before the
+ * new index is written.
+ */
+ barrier();
+ cpu_synth->index = new_index; /* atomic change of index */
+ } else {
+ /*
+ * No overflow : We know that the only bits changed are
+ * contained in the 32 LS32s, which can be written to atomically.
+ */
+ cpu_synth->tsc[cpu_synth->index].sel.ls32 =
+ SW_MS32(cpu_synth->tsc[cpu_synth->index].sel.ls32) | tsc;
+ }
+}
+
+/*
+ * Should only be called when interrupts are off. Affects only current CPU.
+ */
+void _trace_clock_write_synthetic_tsc(u64 value)
+{
+ struct synthetic_tsc_struct *cpu_synth;
+ unsigned int new_index;
+
+ cpu_synth = &per_cpu(synthetic_tsc, smp_processor_id());
+ new_index = 1 - cpu_synth->index; /* 0 <-> 1 */
+ cpu_synth->tsc[new_index].val = value;
+ barrier();
+ cpu_synth->index = new_index; /* atomic change of index */
+}
+
+/* Called from buffer switch : in _any_ context (even NMI) */
+u64 notrace trace_clock_read_synthetic_tsc(void)
+{
+ struct synthetic_tsc_struct *cpu_synth;
+ u64 ret;
+ unsigned int index;
+ u32 tsc;
+
+ preempt_disable_notrace();
+ cpu_synth = &per_cpu(synthetic_tsc, smp_processor_id());
+ index = ACCESS_ONCE(cpu_synth->index); /* atomic read */
+ tsc = trace_clock_read32(); /* Hardware clocksource read */
+
+ /* Overflow detection */
+ if (unlikely(tsc < HW_LS32(cpu_synth->tsc[index].sel.ls32)))
+ ret = (SW_MS32(cpu_synth->tsc[index].val) | (u64)tsc)
+ + (1ULL << TC_HW_BITS);
+ else
+ ret = SW_MS32(cpu_synth->tsc[index].val) | (u64)tsc;
+ preempt_enable_notrace();
+ return ret;
+}
+EXPORT_SYMBOL_GPL(trace_clock_read_synthetic_tsc);
+
+static void synthetic_tsc_ipi(void *info)
+{
+ update_synthetic_tsc();
+}
+
+/*
+ * tsc_timer_fct : - Timer function synchronizing synthetic TSC.
+ * @data: unused
+ *
+ * Guarantees at least 1 execution before low word of TSC wraps.
+ */
+static void tsc_timer_fct(unsigned long data)
+{
+ update_synthetic_tsc();
+
+ mod_timer_pinned(&per_cpu(tsc_timer, smp_processor_id()),
+ jiffies + precalc_expire);
+}
+
+/*
+ * precalc_stsc_interval: - Precalculates the interval between the clock
+ * wraparounds.
+ */
+static int __init precalc_stsc_interval(void)
+{
+ u64 rem_freq, rem_interval;
+
+ precalc_expire =
+ __iter_div_u64_rem(HW_BITMASK, (
+ __iter_div_u64_rem(trace_clock_frequency(),
+ HZ * trace_clock_freq_scale(), &rem_freq) << 1
+ )
+ - 1
+ - (TC_EXPECTED_INTERRUPT_LATENCY * HZ / 1000), &rem_interval)
+ >> 1;
+ WARN_ON(precalc_expire == 0);
+ printk(KERN_DEBUG "Synthetic TSC timer will fire each %u jiffies.\n",
+ precalc_expire);
+ return 0;
+}
+
+static void prepare_synthetic_tsc(int cpu)
+{
+ struct synthetic_tsc_struct *cpu_synth;
+ u64 local_count;
+
+ cpu_synth = &per_cpu(synthetic_tsc, cpu);
+ local_count = trace_clock_read_synthetic_tsc();
+ cpu_synth->tsc[0].val = local_count;
+ cpu_synth->index = 0;
+ smp_wmb(); /* Writing in data of CPU about to come up */
+ init_timer_deferrable(&per_cpu(tsc_timer, cpu));
+ per_cpu(tsc_timer, cpu).function = tsc_timer_fct;
+ per_cpu(tsc_timer, cpu).expires = jiffies + precalc_expire;
+}
+
+static void enable_synthetic_tsc(int cpu)
+{
+ smp_call_function_single(cpu, synthetic_tsc_ipi, NULL, 1);
+ add_timer_on(&per_cpu(tsc_timer, cpu), cpu);
+}
+
+static void disable_synthetic_tsc(int cpu)
+{
+ del_timer_sync(&per_cpu(tsc_timer, cpu));
+}
+
+/*
+ * hotcpu_callback - CPU hotplug callback
+ * @nb: notifier block
+ * @action: hotplug action to take
+ * @hcpu: CPU number
+ *
+ * Sets the new CPU's current synthetic TSC to the same value as the
+ * currently running CPU.
+ *
+ * Returns the success/failure of the operation. (NOTIFY_OK, NOTIFY_BAD)
+ */
+static int __cpuinit hotcpu_callback(struct notifier_block *nb,
+ unsigned long action,
+ void *hcpu)
+{
+ unsigned int hotcpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
+ spin_lock(&synthetic_tsc_lock);
+ if (synthetic_tsc_refcount)
+ prepare_synthetic_tsc(hotcpu);
+ spin_unlock(&synthetic_tsc_lock);
+ break;
+ case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
+ spin_lock(&synthetic_tsc_lock);
+ if (synthetic_tsc_refcount)
+ enable_synthetic_tsc(hotcpu);
+ spin_unlock(&synthetic_tsc_lock);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
+ spin_lock(&synthetic_tsc_lock);
+ if (synthetic_tsc_refcount)
+ disable_synthetic_tsc(hotcpu);
+ spin_unlock(&synthetic_tsc_lock);
+ break;
+#endif /* CONFIG_HOTPLUG_CPU */
+ }
+ return NOTIFY_OK;
+}
+
+void get_synthetic_tsc(void)
+{
+ int cpu;
+
+ spin_lock(&synthetic_tsc_lock);
+ if (synthetic_tsc_refcount++)
+ goto end;
+
+ synthetic_tsc_enabled = 1;
+ for_each_online_cpu(cpu) {
+ prepare_synthetic_tsc(cpu);
+ enable_synthetic_tsc(cpu);
+ }
+end:
+ spin_unlock(&synthetic_tsc_lock);
+}
+EXPORT_SYMBOL_GPL(get_synthetic_tsc);
+
+void put_synthetic_tsc(void)
+{
+ int cpu;
+
+ spin_lock(&synthetic_tsc_lock);
+ WARN_ON(synthetic_tsc_refcount <= 0);
+ if (synthetic_tsc_refcount != 1 || !synthetic_tsc_enabled)
+ goto end;
+
+ for_each_online_cpu(cpu)
+ disable_synthetic_tsc(cpu);
+ synthetic_tsc_enabled = 0;
+end:
+ synthetic_tsc_refcount--;
+ spin_unlock(&synthetic_tsc_lock);
+}
+EXPORT_SYMBOL_GPL(put_synthetic_tsc);
+
+/* Called from CPU 0, before any tracing starts, to init each structure */
+static int __init init_synthetic_tsc(void)
+{
+ precalc_stsc_interval();
+ hotcpu_notifier(hotcpu_callback, 3);
+ return 0;
+}
+
+/* Before SMP is up */
+early_initcall(init_synthetic_tsc);
diff --git a/kernel/trace/trace-clock.c b/kernel/trace/trace-clock.c
new file mode 100644
index 00000000000..3ed1667aacb
--- /dev/null
+++ b/kernel/trace/trace-clock.c
@@ -0,0 +1,97 @@
+/*
+ * kernel/trace/trace-clock.c
+ *
+ * (C) Copyright 2008 -
+ * Mathieu Desnoyers (mathieu.desnoyers@polymtl.ca)
+ *
+ * Generic kernel tracing clock for architectures without TSC.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <linux/cpu.h>
+#include <linux/timex.h>
+#include <linux/bitops.h>
+#include <linux/trace-clock.h>
+#include <linux/jiffies.h>
+
+static int trace_clock_refcount;
+static DEFINE_MUTEX(trace_clock_mutex);
+static struct timer_list trace_clock_timer;
+/*
+ * bits 0..12 : counter, atomically incremented
+ * bits 13..{32,64} : time counter, incremented each jiffy.
+ */
+atomic_long_t trace_clock_var;
+EXPORT_SYMBOL(trace_clock_var);
+
+static void trace_clock_update(void)
+{
+ long old_clock, new_clock;
+ unsigned long ticks;
+
+ /*
+ * Make sure we keep track of delayed timer.
+ */
+ ticks = jiffies - trace_clock_timer.expires + 1;
+ /* Don't update if ticks is zero, time would go backward. */
+ if (unlikely(!ticks))
+ return;
+ do {
+ old_clock = atomic_long_read(&trace_clock_var);
+ new_clock = (old_clock + (ticks << TRACE_CLOCK_SHIFT))
+ & (~((1 << TRACE_CLOCK_SHIFT) - 1));
+ } while (atomic_long_cmpxchg(&trace_clock_var, old_clock, new_clock)
+ != old_clock);
+}
+
+static void trace_clock_timer_fct(unsigned long data)
+{
+ trace_clock_update();
+ trace_clock_timer.expires = jiffies + 1;
+ add_timer(&trace_clock_timer);
+}
+
+static void enable_trace_clock(void)
+{
+ init_timer(&trace_clock_timer);
+ /* trace_clock_update() reads expires */
+ trace_clock_timer.function = trace_clock_timer_fct;
+ trace_clock_timer.expires = jiffies + 1;
+ trace_clock_update();
+ add_timer(&trace_clock_timer);
+}
+
+static void disable_trace_clock(void)
+{
+ del_timer_sync(&trace_clock_timer);
+}
+
+void get_trace_clock(void)
+{
+ get_synthetic_tsc();
+ mutex_lock(&trace_clock_mutex);
+ if (trace_clock_refcount++)
+ goto end;
+ enable_trace_clock();
+end:
+ mutex_unlock(&trace_clock_mutex);
+}
+EXPORT_SYMBOL_GPL(get_trace_clock);
+
+void put_trace_clock(void)
+{
+ mutex_lock(&trace_clock_mutex);
+ WARN_ON(trace_clock_refcount <= 0);
+ if (trace_clock_refcount != 1)
+ goto end;
+ disable_trace_clock();
+end:
+ trace_clock_refcount--;
+ mutex_unlock(&trace_clock_mutex);
+ put_synthetic_tsc();
+}
+EXPORT_SYMBOL_GPL(put_trace_clock);
diff --git a/kernel/trace/trace_printk.c b/kernel/trace/trace_printk.c
index 2547d8813cf..687699d365a 100644
--- a/kernel/trace/trace_printk.c
+++ b/kernel/trace/trace_printk.c
@@ -11,6 +11,7 @@
#include <linux/ftrace.h>
#include <linux/string.h>
#include <linux/module.h>
+#include <linux/marker.h>
#include <linux/mutex.h>
#include <linux/ctype.h>
#include <linux/list.h>
diff --git a/mm/filemap.c b/mm/filemap.c
index 83a45d35468..74bb9f8acf3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -34,6 +34,7 @@
#include <linux/hardirq.h> /* for BUG_ON(!in_atomic()) only */
#include <linux/memcontrol.h>
#include <linux/mm_inline.h> /* for page_is_file_cache() */
+#include <trace/filemap.h>
#include "internal.h"
/*
@@ -43,6 +44,11 @@
#include <asm/mman.h>
+DEFINE_TRACE(wait_on_page_start);
+DEFINE_TRACE(wait_on_page_end);
+DEFINE_TRACE(add_to_page_cache);
+DEFINE_TRACE(remove_from_page_cache);
+
/*
* Shared mappings implemented 30.11.1994. It's not fully working yet,
* though.
@@ -120,6 +126,7 @@ void __remove_from_page_cache(struct page *page)
page->mapping = NULL;
mapping->nrpages--;
__dec_zone_page_state(page, NR_FILE_PAGES);
+ trace_remove_from_page_cache(mapping);
if (PageSwapBacked(page))
__dec_zone_page_state(page, NR_SHMEM);
BUG_ON(page_mapped(page));
@@ -419,6 +426,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
if (likely(!error)) {
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
+ trace_add_to_page_cache(mapping, offset);
if (PageSwapBacked(page))
__inc_zone_page_state(page, NR_SHMEM);
spin_unlock_irq(&mapping->tree_lock);
@@ -511,9 +519,11 @@ void wait_on_page_bit(struct page *page, int bit_nr)
{
DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+ trace_wait_on_page_start(page, bit_nr);
if (test_bit(bit_nr, &page->flags))
__wait_on_bit(page_waitqueue(page), &wait, sync_page,
TASK_UNINTERRUPTIBLE);
+ trace_wait_on_page_end(page, bit_nr);
}
EXPORT_SYMBOL(wait_on_page_bit);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bb0b7c12801..2114fb2615e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -21,6 +21,7 @@
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
+#include <trace/hugetlb.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -53,6 +54,14 @@ static unsigned long __initdata default_hstate_size;
*/
static DEFINE_SPINLOCK(hugetlb_lock);
+DEFINE_TRACE(hugetlb_page_release);
+DEFINE_TRACE(hugetlb_page_grab);
+DEFINE_TRACE(hugetlb_buddy_pgalloc);
+DEFINE_TRACE(hugetlb_page_alloc);
+DEFINE_TRACE(hugetlb_page_free);
+DEFINE_TRACE(hugetlb_pages_reserve);
+DEFINE_TRACE(hugetlb_pages_unreserve);
+
/*
* Region tracking -- allows tracking of reservations and instantiated pages
* across the pages in a mapping.
@@ -500,6 +509,7 @@ static void update_and_free_page(struct hstate *h, struct page *page)
VM_BUG_ON(h->order >= MAX_ORDER);
+ trace_hugetlb_page_release(page);
h->nr_huge_pages--;
h->nr_huge_pages_node[page_to_nid(page)]--;
for (i = 0; i < pages_per_huge_page(h); i++) {
@@ -534,6 +544,7 @@ static void free_huge_page(struct page *page)
int nid = page_to_nid(page);
struct address_space *mapping;
+ trace_hugetlb_page_free(page);
mapping = (struct address_space *) page_private(page);
set_page_private(page, 0);
page->mapping = NULL;
@@ -598,8 +609,10 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
{
struct page *page;
- if (h->order >= MAX_ORDER)
- return NULL;
+ if (h->order >= MAX_ORDER) {
+ page = NULL;
+ goto end;
+ }
page = alloc_pages_exact_node(nid,
htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
@@ -608,11 +621,13 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
if (page) {
if (arch_prepare_hugepage(page)) {
__free_pages(page, huge_page_order(h));
- return NULL;
+ page = NULL;
+ goto end;
}
prep_new_huge_page(h, page, nid);
}
-
+end:
+ trace_hugetlb_page_grab(page);
return page;
}
@@ -781,7 +796,8 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
spin_lock(&hugetlb_lock);
if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
spin_unlock(&hugetlb_lock);
- return NULL;
+ page = NULL;
+ goto end;
} else {
h->nr_huge_pages++;
h->surplus_huge_pages++;
@@ -818,7 +834,8 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
}
spin_unlock(&hugetlb_lock);
-
+end:
+ trace_hugetlb_buddy_pgalloc(page);
return page;
}
@@ -1054,6 +1071,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
vma_commit_reservation(h, vma, addr);
+ trace_hugetlb_page_alloc(page);
return page;
}
@@ -2837,7 +2855,8 @@ int hugetlb_reserve_pages(struct inode *inode,
struct vm_area_struct *vma,
int acctflag)
{
- long ret, chg;
+ int ret = 0;
+ long chg;
struct hstate *h = hstate_inode(inode);
/*
@@ -2846,7 +2865,7 @@ int hugetlb_reserve_pages(struct inode *inode,
* and filesystem quota without using reserves
*/
if (acctflag & VM_NORESERVE)
- return 0;
+ goto end;
/*
* Shared mappings base their reservation on the number of pages that
@@ -2858,8 +2877,10 @@ int hugetlb_reserve_pages(struct inode *inode,
chg = region_chg(&inode->i_mapping->private_list, from, to);
else {
struct resv_map *resv_map = resv_map_alloc();
- if (!resv_map)
- return -ENOMEM;
+ if (!resv_map) {
+ ret = -ENOMEM;
+ goto end;
+ }
chg = to - from;
@@ -2867,12 +2888,16 @@ int hugetlb_reserve_pages(struct inode *inode,
set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
}
- if (chg < 0)
- return chg;
+ if (chg < 0) {
+ ret = chg;
+ goto end;
+ }
/* There must be enough filesystem quota for the mapping */
- if (hugetlb_get_quota(inode->i_mapping, chg))
- return -ENOSPC;
+ if (hugetlb_get_quota(inode->i_mapping, chg)) {
+ ret = -ENOSPC;
+ goto end;
+ }
/*
* Check enough hugepages are available for the reservation.
@@ -2881,7 +2906,7 @@ int hugetlb_reserve_pages(struct inode *inode,
ret = hugetlb_acct_memory(h, chg);
if (ret < 0) {
hugetlb_put_quota(inode->i_mapping, chg);
- return ret;
+ goto end;
}
/*
@@ -2897,14 +2922,18 @@ int hugetlb_reserve_pages(struct inode *inode,
*/
if (!vma || vma->vm_flags & VM_MAYSHARE)
region_add(&inode->i_mapping->private_list, from, to);
- return 0;
+end:
+ trace_hugetlb_pages_reserve(inode, from, to, ret);
+ return ret;
}
void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
{
struct hstate *h = hstate_inode(inode);
- long chg = region_truncate(&inode->i_mapping->private_list, offset);
+ long chg;
+ trace_hugetlb_pages_unreserve(inode, offset, freed);
+ chg = region_truncate(&inode->i_mapping->private_list, offset);
spin_lock(&inode->i_lock);
inode->i_blocks -= (blocks_per_huge_page(h) * freed);
spin_unlock(&inode->i_lock);
diff --git a/mm/memory.c b/mm/memory.c
index 5823698c2b7..7c4cb4e8515 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -57,6 +57,8 @@
#include <linux/swapops.h>
#include <linux/elf.h>
#include <linux/gfp.h>
+#include <trace/swap.h>
+#include <trace/fault.h>
#include <asm/io.h>
#include <asm/pgalloc.h>
@@ -67,6 +69,10 @@
#include "internal.h"
+DEFINE_TRACE(swap_in);
+DEFINE_TRACE(page_fault_get_user_entry);
+DEFINE_TRACE(page_fault_get_user_exit);
+
#ifndef CONFIG_NEED_MULTIPLE_NODES
/* use the per-pgdat data instead for discontigmem - mbligh */
unsigned long max_mapnr;
@@ -2747,6 +2753,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
+ trace_swap_in(page, entry);
} else if (PageHWPoison(page)) {
/*
* hwpoisoned dirty swapcache pages are kept for killing
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cdef1d4b4e4..3ea857c5e28 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -53,6 +53,7 @@
#include <linux/compaction.h>
#include <trace/events/kmem.h>
#include <linux/ftrace_event.h>
+#include <trace/page_alloc.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
@@ -129,6 +130,9 @@ void pm_restrict_gfp_mask(void)
int pageblock_order __read_mostly;
#endif
+DEFINE_TRACE(page_alloc);
+DEFINE_TRACE(page_free);
+
static void __free_pages_ok(struct page *page, unsigned int order);
/*
@@ -2165,6 +2169,7 @@ nopage:
}
return page;
got_pg:
+ trace_page_alloc(page, order);
if (kmemcheck_enabled)
kmemcheck_pagealloc_alloc(page, order, gfp_mask);
return page;
diff --git a/mm/page_io.c b/mm/page_io.c
index 2dee975bf46..d262ffb0c2d 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -18,8 +18,11 @@
#include <linux/bio.h>
#include <linux/swapops.h>
#include <linux/writeback.h>
+#include <trace/swap.h>
#include <asm/pgtable.h>
+DEFINE_TRACE(swap_out);
+
static struct bio *get_swap_bio(gfp_t gfp_flags,
struct page *page, bio_end_io_t end_io)
{
@@ -109,6 +112,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
rw |= REQ_SYNC | REQ_UNPLUG;
count_vm_event(PSWPOUT);
set_page_writeback(page);
+ trace_swap_out(page);
unlock_page(page);
submit_bio(rw, bio);
out:
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0341c5700e3..41a00cead75 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,12 +31,16 @@
#include <linux/syscalls.h>
#include <linux/memcontrol.h>
#include <linux/poll.h>
+#include <trace/swap.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <linux/swapops.h>
#include <linux/page_cgroup.h>
+DEFINE_TRACE(swap_file_open);
+DEFINE_TRACE(swap_file_close);
+
static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
unsigned char);
static void free_swap_count_continuations(struct swap_info_struct *);
@@ -1669,6 +1673,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
swap_map = p->swap_map;
p->swap_map = NULL;
p->flags = 0;
+ trace_swap_file_close(swap_file);
spin_unlock(&swap_lock);
mutex_unlock(&swapon_mutex);
vfree(swap_map);
@@ -2129,6 +2134,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
swap_list.head = swap_list.next = type;
else
swap_info[prev]->next = type;
+ trace_swap_file_open(swap_file, name);
spin_unlock(&swap_lock);
mutex_unlock(&swapon_mutex);
atomic_inc(&proc_poll_event);
@@ -2280,6 +2286,13 @@ int swap_duplicate(swp_entry_t entry)
return err;
}
+struct swap_info_struct *
+get_swap_info_struct(unsigned type)
+{
+ return swap_info[type];
+}
+EXPORT_SYMBOL_GPL(get_swap_info_struct);
+
/*
* @entry: swap entry for which we allocate swap cache.
*
@@ -2560,3 +2573,22 @@ static void free_swap_count_continuations(struct swap_info_struct *si)
}
}
}
+
+void ltt_dump_swap_files(void *call_data)
+{
+ int type;
+ struct swap_info_struct *p = NULL;
+
+ mutex_lock(&swapon_mutex);
+ for (type = swap_list.head; type >= 0; type = swap_info[type]->next) {
+ p = swap_info[type];
+ if (!(p->flags & SWP_WRITEOK))
+ continue;
+ __trace_mark(0, swap_state, statedump_swap_files, call_data,
+ "filp %p vfsmount %p dname %s",
+ p->swap_file, p->swap_file->f_vfsmnt,
+ p->swap_file->f_dentry->d_name.name);
+ }
+ mutex_unlock(&swapon_mutex);
+}
+EXPORT_SYMBOL_GPL(ltt_dump_swap_files);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index f9b166732e7..f2df4585ae2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2059,7 +2059,7 @@ EXPORT_SYMBOL(remap_vmalloc_range);
void __attribute__((weak)) vmalloc_sync_all(void)
{
}
-
+EXPORT_SYMBOL_GPL(vmalloc_sync_all);
static int f(pte_t *pte, pgtable_t table, unsigned long addr, void *data)
{
diff --git a/net/core/dev.c b/net/core/dev.c
index 8ae6631abcc..4dd6af4b373 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -132,6 +132,7 @@
#include <trace/events/skb.h>
#include <linux/pci.h>
#include <linux/inetdevice.h>
+#include <trace/net.h>
#include "net-sysfs.h"
@@ -198,6 +199,13 @@ static struct list_head ptype_all __read_mostly; /* Taps */
DEFINE_RWLOCK(dev_base_lock);
EXPORT_SYMBOL(dev_base_lock);
+DEFINE_TRACE(lttng_net_dev_xmit);
+DEFINE_TRACE(lttng_net_dev_receive);
+DEFINE_TRACE(net_napi_schedule);
+DEFINE_TRACE(net_napi_poll);
+DEFINE_TRACE(net_napi_complete);
+EXPORT_TRACEPOINT_SYMBOL_GPL(net_napi_complete);
+
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
@@ -2111,6 +2119,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
+ trace_lttng_net_dev_xmit(skb);
rc = ops->ndo_start_xmit(skb, dev);
trace_net_dev_xmit(skb, rc);
if (rc == NETDEV_TX_OK)
@@ -2132,6 +2141,7 @@ gso:
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(nskb);
+ trace_lttng_net_dev_xmit(nskb);
rc = ops->ndo_start_xmit(nskb, dev);
trace_net_dev_xmit(nskb, rc);
if (unlikely(rc != NETDEV_TX_OK)) {
@@ -2733,6 +2743,8 @@ int netif_rx(struct sk_buff *skb)
if (netpoll_rx(skb))
return NET_RX_DROP;
+ trace_lttng_net_dev_receive(skb);
+
if (netdev_tstamp_prequeue)
net_timestamp_check(skb);
@@ -3166,6 +3178,8 @@ int netif_receive_skb(struct sk_buff *skb)
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
+ trace_lttng_net_dev_receive(skb);
+
#ifdef CONFIG_RPS
{
struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -3617,6 +3631,8 @@ void __napi_schedule(struct napi_struct *n)
{
unsigned long flags;
+ trace_net_napi_schedule(n);
+
local_irq_save(flags);
____napi_schedule(&__get_cpu_var(softnet_data), n);
local_irq_restore(flags);
@@ -3631,6 +3647,7 @@ void __napi_complete(struct napi_struct *n)
list_del(&n->poll_list);
smp_mb__before_clear_bit();
clear_bit(NAPI_STATE_SCHED, &n->state);
+ trace_net_napi_complete(n);
}
EXPORT_SYMBOL(__napi_complete);
@@ -3730,6 +3747,7 @@ static void net_rx_action(struct softirq_action *h)
*/
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+ trace_net_napi_poll(n);
work = n->poll(n, weight);
trace_napi_poll(n);
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index df4616fce92..a3af729a5e5 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -62,6 +62,7 @@
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
+#include <trace/ipv4.h>
static struct ipv4_devconf ipv4_devconf = {
.data = {
@@ -92,6 +93,9 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
};
+DEFINE_TRACE(ipv4_addr_add);
+DEFINE_TRACE(ipv4_addr_del);
+
static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
@@ -252,6 +256,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
struct in_ifaddr **ifap1 = &ifa1->ifa_next;
while ((ifa = *ifap1) != NULL) {
+ trace_ipv4_addr_del(ifa);
if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
ifa1->ifa_scope <= ifa->ifa_scope)
last_prim = ifa;
@@ -358,6 +363,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
}
ifa->ifa_flags |= IFA_F_SECONDARY;
}
+ trace_ipv4_addr_add(ifa);
}
if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 02f583b3744..61f72b24e7e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -81,6 +81,9 @@
#include <linux/crypto.h>
#include <linux/scatterlist.h>
+#include <trace/net.h>
+
+DEFINE_TRACE(net_tcpv4_rcv);
int sysctl_tcp_tw_reuse __read_mostly;
int sysctl_tcp_low_latency __read_mostly;
@@ -1543,6 +1546,9 @@ static __sum16 tcp_v4_checksum_init(struct sk_buff *skb)
int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct sock *rsk;
+
+ trace_net_tcpv4_rcv(skb);
+
#ifdef CONFIG_TCP_MD5SIG
/*
* We really want to reject the packet as early as possible
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8157b17959e..21f5aac48d3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -105,6 +105,7 @@
#include <net/route.h>
#include <net/checksum.h>
#include <net/xfrm.h>
+#include <trace/net.h>
#include "udp_impl.h"
struct udp_table udp_table __read_mostly;
@@ -125,6 +126,8 @@ EXPORT_SYMBOL(udp_memory_allocated);
#define MAX_UDP_PORTS 65536
#define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN)
+DEFINE_TRACE(net_udpv4_rcv);
+
static int udp_lib_lport_inuse(struct net *net, __u16 num,
const struct udp_hslot *hslot,
unsigned long *bitmap,
@@ -1596,6 +1599,8 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp4_csum_init(skb, uh, proto))
goto csum_error;
+ trace_net_udpv4_rcv(skb);
+
if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return __udp4_lib_mcast_deliver(net, skb, uh,
saddr, daddr, udptable);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fd6782e3a03..62c96253c36 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -87,6 +87,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <trace/ipv6.h>
/* Set to 3 to get tracing... */
#define ACONF_DEBUG 2
@@ -108,6 +109,9 @@ static inline u32 cstamp_delta(unsigned long cstamp)
#define ADDRCONF_TIMER_FUZZ (HZ / 4)
#define ADDRCONF_TIMER_FUZZ_MAX (HZ)
+DEFINE_TRACE(ipv6_addr_add);
+DEFINE_TRACE(ipv6_addr_del);
+
#ifdef CONFIG_SYSCTL
static void addrconf_sysctl_register(struct inet6_dev *idev);
static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -676,6 +680,8 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
/* For caller */
in6_ifa_hold(ifa);
+ trace_ipv6_addr_add(ifa);
+
/* Add to big hash table */
hash = ipv6_addr_hash(addr);
@@ -2212,6 +2218,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
in6_ifa_hold(ifp);
read_unlock_bh(&idev->lock);
+ trace_ipv6_addr_del(ifp);
ipv6_del_addr(ifp);
/* If the last address is deleted administratively,
diff --git a/net/socket.c b/net/socket.c
index ac2219f90d5..332c43007d1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -98,6 +98,7 @@
#include <net/sock.h>
#include <linux/netfilter.h>
+#include <trace/socket.h>
#include <linux/if_tun.h>
#include <linux/ipv6_route.h>
@@ -164,6 +165,21 @@ static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
static DEFINE_PER_CPU(int, sockets_in_use);
+DEFINE_TRACE(socket_create);
+DEFINE_TRACE(socket_bind);
+DEFINE_TRACE(socket_connect);
+DEFINE_TRACE(socket_listen);
+DEFINE_TRACE(socket_accept);
+DEFINE_TRACE(socket_getsockname);
+DEFINE_TRACE(socket_getpeername);
+DEFINE_TRACE(socket_socketpair);
+DEFINE_TRACE(socket_sendmsg);
+DEFINE_TRACE(socket_recvmsg);
+DEFINE_TRACE(socket_setsockopt);
+DEFINE_TRACE(socket_getsockopt);
+DEFINE_TRACE(socket_shutdown);
+DEFINE_TRACE(socket_call);
+
/*
* Support routines.
* Move socket addresses back and forth across the kernel/user
@@ -564,7 +580,9 @@ static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
if (err)
return err;
- return sock->ops->sendmsg(iocb, sock, msg, size);
+ err = sock->ops->sendmsg(iocb, sock, msg, size);
+ trace_socket_sendmsg(sock, msg, size, err);
+ return err;
}
int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -682,6 +700,7 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
struct sock_iocb *si = kiocb_to_siocb(iocb);
+ int err;
sock_update_classid(sock->sk);
@@ -691,7 +710,9 @@ static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
si->size = size;
si->flags = flags;
- return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+ err = sock->ops->recvmsg(iocb, sock, msg, size, flags);
+ trace_socket_recvmsg(sock, msg, size, flags, err);
+ return err;
}
static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
@@ -1299,8 +1320,10 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
BUILD_BUG_ON(SOCK_NONBLOCK & SOCK_TYPE_MASK);
flags = type & ~SOCK_TYPE_MASK;
- if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
- return -EINVAL;
+ if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) {
+ retval = -EINVAL;
+ goto out;
+ }
type &= SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
@@ -1314,12 +1337,12 @@ SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol)
if (retval < 0)
goto out_release;
-out:
- /* It may be already another descriptor 8) Not kernel problem. */
- return retval;
-
+ goto out;
out_release:
sock_release(sock);
+out:
+ trace_socket_create(family, type, protocol, sock, retval);
+ /* It may be already another descriptor 8) Not kernel problem. */
return retval;
}
@@ -1336,8 +1359,10 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
int flags;
flags = type & ~SOCK_TYPE_MASK;
- if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
- return -EINVAL;
+ if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) {
+ err = -EINVAL;
+ goto out;
+ }
type &= SOCK_TYPE_MASK;
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
@@ -1386,17 +1411,18 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol,
if (!err)
err = put_user(fd2, &usockvec[1]);
if (!err)
- return 0;
+ goto out;
sys_close(fd2);
sys_close(fd1);
- return err;
+ goto out;
out_release_both:
sock_release(sock2);
out_release_1:
sock_release(sock1);
out:
+ trace_socket_socketpair(family, type, protocol, usockvec, err);
return err;
}
@@ -1428,6 +1454,7 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen)
}
fput_light(sock->file, fput_needed);
}
+ trace_socket_bind(fd, umyaddr, addrlen, err);
return err;
}
@@ -1455,6 +1482,7 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog)
fput_light(sock->file, fput_needed);
}
+ trace_socket_listen(fd, backlog, err);
return err;
}
@@ -1478,8 +1506,10 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
int err, len, newfd, fput_needed;
struct sockaddr_storage address;
- if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
- return -EINVAL;
+ if (flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) {
+ err = -EINVAL;
+ goto out;
+ }
if (SOCK_NONBLOCK != O_NONBLOCK && (flags & SOCK_NONBLOCK))
flags = (flags & ~SOCK_NONBLOCK) | O_NONBLOCK;
@@ -1537,6 +1567,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
out_put:
fput_light(sock->file, fput_needed);
out:
+ trace_socket_accept(fd, upeer_sockaddr, upeer_addrlen, flags, err);
return err;
out_fd:
fput(newfile);
@@ -1586,6 +1617,7 @@ SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr,
out_put:
fput_light(sock->file, fput_needed);
out:
+ trace_socket_connect(fd, uservaddr, addrlen, err);
return err;
}
@@ -1617,6 +1649,7 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr,
out_put:
fput_light(sock->file, fput_needed);
out:
+ trace_socket_getsockname(fd, usockaddr, usockaddr_len, err);
return err;
}
@@ -1637,7 +1670,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
err = security_socket_getpeername(sock);
if (err) {
fput_light(sock->file, fput_needed);
- return err;
+ goto out;
}
err =
@@ -1648,6 +1681,8 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
usockaddr_len);
fput_light(sock->file, fput_needed);
}
+out:
+ trace_socket_getpeername(fd, usockaddr, usockaddr_len, err);
return err;
}
@@ -1778,8 +1813,10 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
int err, fput_needed;
struct socket *sock;
- if (optlen < 0)
- return -EINVAL;
+ if (optlen < 0) {
+ err = -EINVAL;
+ goto out;
+ }
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock != NULL) {
@@ -1798,6 +1835,8 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname,
out_put:
fput_light(sock->file, fput_needed);
}
+out:
+ trace_socket_setsockopt(fd, level, optname, optval, optlen, err);
return err;
}
@@ -1829,6 +1868,7 @@ SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
out_put:
fput_light(sock->file, fput_needed);
}
+ trace_socket_getsockopt(fd, level, optname, optval, optlen, err);
return err;
}
@@ -1848,6 +1888,7 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how)
err = sock->ops->shutdown(sock, how);
fput_light(sock->file, fput_needed);
}
+ trace_socket_shutdown(fd, how, err);
return err;
}
@@ -2249,6 +2290,8 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
a0 = a[0];
a1 = a[1];
+ trace_socket_call(call, a0);
+
switch (call) {
case SYS_SOCKET:
err = sys_socket(a0, a1, a[2]);
diff --git a/samples/Kconfig b/samples/Kconfig
index e03cf0e374d..7b1eadec88a 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -5,6 +5,12 @@ menuconfig SAMPLES
if SAMPLES
+config SAMPLE_MARKERS
+ tristate "Build markers examples -- loadable modules only"
+ depends on MARKERS && m
+ help
+ This build markers example modules.
+
config SAMPLE_TRACEPOINTS
tristate "Build tracepoints examples -- loadable modules only"
depends on TRACEPOINTS && m
diff --git a/samples/Makefile b/samples/Makefile
index f26c0959fd8..ab354640197 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,4 +1,4 @@
# Makefile for Linux samples code
obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \
- hw_breakpoint/ kfifo/ kdb/
+ hw_breakpoint/ kfifo/ kdb/ markers/
diff --git a/samples/markers/Makefile b/samples/markers/Makefile
new file mode 100644
index 00000000000..2244152159d
--- /dev/null
+++ b/samples/markers/Makefile
@@ -0,0 +1,4 @@
+# builds the kprobes example kernel modules;
+# then to use one (as root): insmod <module_name.ko>
+
+obj-$(CONFIG_SAMPLE_MARKERS) += probe-example.o marker-example.o test-multi.o
diff --git a/samples/markers/marker-example.c b/samples/markers/marker-example.c
new file mode 100644
index 00000000000..06afde47631
--- /dev/null
+++ b/samples/markers/marker-example.c
@@ -0,0 +1,53 @@
+/* marker-example.c
+ *
+ * Executes a marker when /proc/marker-example is opened.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <linux/sched.h>
+#include <linux/proc_fs.h>
+
+struct proc_dir_entry *pentry_example;
+
+static int my_open(struct inode *inode, struct file *file)
+{
+ int i;
+
+ trace_mark(samples, subsystem_event, "integer %d string %s", 123,
+ "example string");
+ for (i = 0; i < 10; i++)
+ trace_mark(samples, subsystem_eventb, MARK_NOARGS);
+ return -EPERM;
+}
+
+static struct file_operations mark_ops = {
+ .open = my_open,
+};
+
+static int __init example_init(void)
+{
+ printk(KERN_ALERT "example init\n");
+ pentry_example = proc_create("marker-example", 0444, NULL, &mark_ops);
+ if (!pentry_example)
+ return -EPERM;
+ return 0;
+}
+
+static void __exit example_exit(void)
+{
+ printk(KERN_ALERT "example exit\n");
+ remove_proc_entry("marker-example", NULL);
+}
+
+module_init(example_init)
+module_exit(example_exit)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("Marker example");
diff --git a/samples/markers/probe-example.c b/samples/markers/probe-example.c
new file mode 100644
index 00000000000..2c449c0c1eb
--- /dev/null
+++ b/samples/markers/probe-example.c
@@ -0,0 +1,94 @@
+/* probe-example.c
+ *
+ * Connects two functions to marker call sites.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <asm/atomic.h>
+
+struct probe_data {
+ const char *name;
+ const char *format;
+ marker_probe_func *probe_func;
+};
+
+static void probe_subsystem_event(const struct marker *mdata,
+ void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ /* Declare args */
+ unsigned int value;
+ const char *mystr;
+
+ /* Assign args */
+ value = va_arg(*args, typeof(value));
+ mystr = va_arg(*args, typeof(mystr));
+
+ /* Call printk */
+ printk(KERN_INFO "Value %u, string %s\n", value, mystr);
+
+ /* or count, check rights, serialize data in a buffer */
+}
+
+atomic_t eventb_count = ATOMIC_INIT(0);
+
+static void probe_subsystem_eventb(const struct marker *mdata,
+ void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ /* Increment counter */
+ atomic_inc(&eventb_count);
+}
+
+static struct probe_data probe_array[] =
+{
+ { .name = "subsystem_event",
+ .format = "integer %d string %s",
+ .probe_func = probe_subsystem_event },
+ { .name = "subsystem_eventb",
+ .format = MARK_NOARGS,
+ .probe_func = probe_subsystem_eventb },
+};
+
+static int __init probe_init(void)
+{
+ int result;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
+ result = marker_probe_register("samples", probe_array[i].name,
+ probe_array[i].format,
+ probe_array[i].probe_func, &probe_array[i]);
+ if (result)
+ printk(KERN_INFO "Unable to register probe %s\n",
+ probe_array[i].name);
+ }
+ return 0;
+}
+
+static void __exit probe_fini(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(probe_array); i++)
+ marker_probe_unregister("samples", probe_array[i].name,
+ probe_array[i].probe_func, &probe_array[i]);
+ printk(KERN_INFO "Number of event b : %u\n",
+ atomic_read(&eventb_count));
+ marker_synchronize_unregister();
+}
+
+module_init(probe_init);
+module_exit(probe_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("SUBSYSTEM Probe");
diff --git a/samples/markers/test-multi.c b/samples/markers/test-multi.c
new file mode 100644
index 00000000000..9f62d2921b8
--- /dev/null
+++ b/samples/markers/test-multi.c
@@ -0,0 +1,116 @@
+/* test-multi.c
+ *
+ * Connects multiple callbacks.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <asm/atomic.h>
+
+struct probe_data {
+ const char *name;
+ const char *format;
+ marker_probe_func *probe_func;
+};
+
+atomic_t eventb_count = ATOMIC_INIT(0);
+
+void probe_subsystem_eventa(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ /* Increment counter */
+ atomic_inc(&eventb_count);
+}
+
+void probe_subsystem_eventb(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ /* Increment counter */
+ atomic_inc(&eventb_count);
+}
+
+void probe_subsystem_eventc(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ /* Increment counter */
+ atomic_inc(&eventb_count);
+}
+
+void probe_subsystem_eventd(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ /* Increment counter */
+ atomic_inc(&eventb_count);
+}
+
+static struct probe_data probe_array[] =
+{
+ { .name = "test_multi",
+ .format = MARK_NOARGS,
+ .probe_func = (marker_probe_func*)0xa },
+ { .name = "test_multi",
+ .format = MARK_NOARGS,
+ .probe_func = (marker_probe_func*)0xb },
+ { .name = "test_multi",
+ .format = MARK_NOARGS,
+ .probe_func = (marker_probe_func*)0xc },
+ { .name = "test_multi",
+ .format = MARK_NOARGS,
+ .probe_func = (marker_probe_func*)0xd },
+ { .name = "test_multi",
+ .format = MARK_NOARGS,
+ .probe_func = (marker_probe_func*)0x10 },
+ { .name = "test_multi",
+ .format = MARK_NOARGS,
+ .probe_func = (marker_probe_func*)0x20 },
+ { .name = "test_multi",
+ .format = MARK_NOARGS,
+ .probe_func = (marker_probe_func*)0x30 },
+};
+
+static int __init probe_init(void)
+{
+ int result;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
+ result = marker_probe_register("samples", probe_array[i].name,
+ probe_array[i].format,
+ probe_array[i].probe_func, (void*)(long)i);
+ if (result)
+ printk(KERN_INFO "Unable to register probe %s\n",
+ probe_array[i].name);
+ }
+ return 0;
+}
+
+static void __exit probe_fini(void)
+{
+ int result;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(probe_array); i++) {
+ result = marker_probe_unregister("samples", probe_array[i].name,
+ probe_array[i].probe_func, (void*)(long)i);
+ if (result)
+ printk(KERN_INFO "Unable to unregister probe %s\n",
+ probe_array[i].name);
+ }
+ printk(KERN_INFO "Number of event b : %u\n",
+ atomic_read(&eventb_count));
+ marker_synchronize_unregister();
+}
+
+module_init(probe_init);
+module_exit(probe_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mathieu Desnoyers");
+MODULE_DESCRIPTION("SUBSYSTEM Probe");
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 7d22056582c..e0d2e063d91 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -13,6 +13,7 @@
# 2) modpost is then used to
# 3) create one <module>.mod.c file pr. module
# 4) create one Module.symvers file with CRC for all exported symbols
+# 4a) [CONFIG_MARKERS] create one Module.markers file listing defined markers
# 5) compile all <module>.mod.c files
# 6) final link of the module to a <module.ko> file
@@ -58,6 +59,10 @@ include scripts/Makefile.lib
kernelsymfile := $(objtree)/Module.symvers
modulesymfile := $(firstword $(KBUILD_EXTMOD))/Module.symvers
+kernelmarkersfile := $(objtree)/Module.markers
+modulemarkersfile := $(firstword $(KBUILD_EXTMOD))/Module.markers
+
+markersfile = $(if $(KBUILD_EXTMOD),$(modulemarkersfile),$(kernelmarkersfile))
# Step 1), find all modules listed in $(MODVERDIR)/
__modules := $(sort $(shell grep -h '\.ko' /dev/null $(wildcard $(MODVERDIR)/*.mod)))
@@ -80,6 +85,8 @@ modpost = scripts/mod/modpost \
$(if $(KBUILD_EXTRA_SYMBOLS), $(patsubst %, -e %,$(KBUILD_EXTRA_SYMBOLS))) \
$(if $(KBUILD_EXTMOD),-o $(modulesymfile)) \
$(if $(CONFIG_DEBUG_SECTION_MISMATCH),,-S) \
+ $(if $(CONFIG_MARKERS),-K $(kernelmarkersfile)) \
+ $(if $(CONFIG_MARKERS),-M $(markersfile)) \
$(if $(KBUILD_EXTMOD)$(KBUILD_MODPOST_WARN),-w) \
$(if $(cross_build),-c)
@@ -94,12 +101,17 @@ quiet_cmd_kernel-mod = MODPOST $@
cmd_kernel-mod = $(modpost) $@
vmlinux.o: FORCE
+ @rm -fr $(kernelmarkersfile)
$(call cmd,kernel-mod)
# Declare generated files as targets for modpost
$(symverfile): __modpost ;
$(modules:.ko=.mod.c): __modpost ;
+ifdef CONFIG_MARKERS
+$(markersfile): __modpost ;
+endif
+
# Step 5), compile all *.mod.c files
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index e8fba959fff..097d58047fe 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -481,6 +481,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
info->export_unused_gpl_sec = i;
else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
info->export_gpl_future_sec = i;
+ else if (strcmp(secname, "__markers_strings") == 0)
+ info->markers_strings_sec = i;
if (sechdrs[i].sh_type == SHT_SYMTAB) {
unsigned int sh_link_idx;
@@ -798,6 +800,7 @@ static const char *section_white_list[] =
".note*",
".got*",
".toc*",
+ "__discard",
NULL
};
@@ -1640,6 +1643,62 @@ static void check_sec_ref(struct module *mod, const char *modname,
}
}
+static void get_markers(struct elf_info *info, struct module *mod)
+{
+ const Elf_Shdr *sh = &info->sechdrs[info->markers_strings_sec];
+ const char *strings = (const char *) info->hdr + sh->sh_offset;
+ const Elf_Sym *sym, *first_sym, *last_sym;
+ size_t n;
+
+ if (!info->markers_strings_sec)
+ return;
+
+ /*
+ * First count the strings. We look for all the symbols defined
+ * in the __markers_strings section named __mstrtab_*. For
+ * these local names, the compiler puts a random .NNN suffix on,
+ * so the names don't correspond exactly.
+ */
+ first_sym = last_sym = NULL;
+ n = 0;
+ for (sym = info->symtab_start; sym < info->symtab_stop; sym++)
+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+ sym->st_shndx == info->markers_strings_sec &&
+ !strncmp(info->strtab + sym->st_name,
+ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
+ if (first_sym == NULL)
+ first_sym = sym;
+ last_sym = sym;
+ ++n;
+ }
+
+ if (n == 0)
+ return;
+
+ /*
+ * Now collect each name and format into a line for the output.
+ * Lines look like:
+ * marker_name vmlinux marker %s format %d
+ * The format string after the second \t can use whitespace.
+ */
+ mod->markers = NOFAIL(malloc(sizeof mod->markers[0] * n));
+ mod->nmarkers = n;
+
+ n = 0;
+ for (sym = first_sym; sym <= last_sym; sym++)
+ if (ELF_ST_TYPE(sym->st_info) == STT_OBJECT &&
+ sym->st_shndx == info->markers_strings_sec &&
+ !strncmp(info->strtab + sym->st_name,
+ "__mstrtab_", sizeof "__mstrtab_" - 1)) {
+ const char *name = strings + sym->st_value;
+ const char *fmt = strchr(name, '\0') + 1;
+ char *line = NULL;
+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+ NOFAIL(line);
+ mod->markers[n++] = line;
+ }
+}
+
static void read_symbols(char *modname)
{
const char *symname;
@@ -1695,6 +1754,8 @@ static void read_symbols(char *modname)
get_src_version(modname, mod->srcversion,
sizeof(mod->srcversion)-1);
+ get_markers(&info, mod);
+
parse_elf_finish(&info);
/* Our trick to get versioning for module struct etc. - it's
@@ -2049,6 +2110,96 @@ static void write_dump(const char *fname)
write_if_changed(&buf, fname);
}
+static void add_marker(struct module *mod, const char *name, const char *fmt)
+{
+ char *line = NULL;
+ asprintf(&line, "%s\t%s\t%s\n", name, mod->name, fmt);
+ NOFAIL(line);
+
+ mod->markers = NOFAIL(realloc(mod->markers, ((mod->nmarkers + 1) *
+ sizeof mod->markers[0])));
+ mod->markers[mod->nmarkers++] = line;
+}
+
+static void read_markers(const char *fname)
+{
+ unsigned long size, pos = 0;
+ void *file = grab_file(fname, &size);
+ char *line;
+
+ if (!file) /* No old markers, silently ignore */
+ return;
+
+ while ((line = get_next_line(&pos, file, size))) {
+ char *marker, *modname, *fmt;
+ struct module *mod;
+
+ marker = line;
+ modname = strchr(marker, '\t');
+ if (!modname)
+ goto fail;
+ *modname++ = '\0';
+ fmt = strchr(modname, '\t');
+ if (!fmt)
+ goto fail;
+ *fmt++ = '\0';
+ if (*marker == '\0' || *modname == '\0')
+ goto fail;
+
+ mod = find_module(modname);
+ if (!mod) {
+ mod = new_module(modname);
+ mod->skip = 1;
+ }
+ if (is_vmlinux(modname)) {
+ have_vmlinux = 1;
+ mod->skip = 0;
+ }
+
+ if (!mod->skip)
+ add_marker(mod, marker, fmt);
+ }
+ release_file(file, size);
+ return;
+fail:
+ fatal("parse error in markers list file\n");
+}
+
+static int compare_strings(const void *a, const void *b)
+{
+ return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static void write_markers(const char *fname)
+{
+ struct buffer buf = { };
+ struct module *mod;
+ size_t i;
+
+ for (mod = modules; mod; mod = mod->next)
+ if ((!external_module || !mod->skip) && mod->markers != NULL) {
+ /*
+ * Sort the strings so we can skip duplicates when
+ * we write them out.
+ */
+ qsort(mod->markers, mod->nmarkers,
+ sizeof mod->markers[0], &compare_strings);
+ for (i = 0; i < mod->nmarkers; ++i) {
+ char *line = mod->markers[i];
+ buf_write(&buf, line, strlen(line));
+ while (i + 1 < mod->nmarkers &&
+ !strcmp(mod->markers[i],
+ mod->markers[i + 1]))
+ free(mod->markers[i++]);
+ free(mod->markers[i]);
+ }
+ free(mod->markers);
+ mod->markers = NULL;
+ }
+
+ write_if_changed(&buf, fname);
+}
+
struct ext_sym_list {
struct ext_sym_list *next;
const char *file;
@@ -2060,6 +2211,8 @@ int main(int argc, char **argv)
struct buffer buf = { };
char *kernel_read = NULL, *module_read = NULL;
char *dump_write = NULL;
+ char *markers_read = NULL;
+ char *markers_write = NULL;
int opt;
int err;
struct ext_sym_list *extsym_iter;
@@ -2103,6 +2256,12 @@ int main(int argc, char **argv)
case 'w':
warn_unresolved = 1;
break;
+ case 'M':
+ markers_write = optarg;
+ break;
+ case 'K':
+ markers_read = optarg;
+ break;
default:
exit(1);
}
@@ -2157,5 +2316,11 @@ int main(int argc, char **argv)
"'make CONFIG_DEBUG_SECTION_MISMATCH=y'\n",
sec_mismatch_count);
+ if (markers_read)
+ read_markers(markers_read);
+
+ if (markers_write)
+ write_markers(markers_write);
+
return err;
}
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 0388cfccac8..dbde650961e 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -112,6 +112,8 @@ struct module {
int has_init;
int has_cleanup;
struct buffer dev_table_buf;
+ char **markers;
+ size_t nmarkers;
char srcversion[25];
};
@@ -126,6 +128,7 @@ struct elf_info {
Elf_Section export_gpl_sec;
Elf_Section export_unused_gpl_sec;
Elf_Section export_gpl_future_sec;
+ Elf_Section markers_strings_sec;
const char *strtab;
char *modinfo;
unsigned int modinfo_len;