aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-record.txt3
-rw-r--r--tools/perf/Makefile2
-rw-r--r--tools/perf/arch/s390/Makefile4
-rw-r--r--tools/perf/arch/s390/util/dwarf-regs.c22
-rw-r--r--tools/perf/builtin-record.c9
-rw-r--r--tools/perf/builtin-sched.c23
-rw-r--r--tools/perf/builtin-stat.c6
-rw-r--r--tools/perf/builtin-test.c116
-rw-r--r--tools/perf/builtin-top.c3
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/util/evsel.c87
-rw-r--r--tools/perf/util/evsel.h2
-rw-r--r--tools/perf/util/parse-events.c74
-rw-r--r--tools/perf/util/session.c2
-rw-r--r--tools/power/x86/turbostat/Makefile8
-rw-r--r--tools/power/x86/turbostat/turbostat.8172
-rw-r--r--tools/power/x86/turbostat/turbostat.c1048
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile8
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8104
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c325
-rw-r--r--tools/slub/slabinfo.c1364
-rwxr-xr-xtools/testing/ktest/compare-ktest-sample.pl30
-rwxr-xr-xtools/testing/ktest/ktest.pl2023
-rw-r--r--tools/testing/ktest/sample.conf622
-rw-r--r--tools/virtio/Makefile12
-rw-r--r--tools/virtio/linux/device.h2
-rw-r--r--tools/virtio/linux/slab.h2
-rw-r--r--tools/virtio/linux/virtio.h223
-rw-r--r--tools/virtio/vhost_test/Makefile2
-rw-r--r--tools/virtio/vhost_test/vhost_test.c1
-rw-r--r--tools/virtio/virtio_test.c248
31 files changed, 6460 insertions, 89 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 52462ae2645..e032716c839 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -61,6 +61,9 @@ OPTIONS
-r::
--realtime=::
Collect data with this RT SCHED_FIFO priority.
+-D::
+--no-delay::
+ Collect data without buffering.
-A::
--append::
Append to the output file to do incremental profiling.
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1b9b13ee2a7..2b5387d53ba 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -227,7 +227,7 @@ ifndef PERF_DEBUG
CFLAGS_OPTIMIZE = -O6
endif
-CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
EXTLIBS = -lpthread -lrt -lelf -lm
ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
ALL_LDFLAGS = $(LDFLAGS)
diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
new file mode 100644
index 00000000000..15130b50dfe
--- /dev/null
+++ b/tools/perf/arch/s390/Makefile
@@ -0,0 +1,4 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c
new file mode 100644
index 00000000000..e19653e025f
--- /dev/null
+++ b/tools/perf/arch/s390/util/dwarf-regs.c
@@ -0,0 +1,22 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright IBM Corp. 2010
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <libio.h>
+#include <dwarf-regs.h>
+
+#define NUM_GPRS 16
+
+static const char *gpr_names[NUM_GPRS] = {
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+ return (n >= NUM_GPRS) ? NULL : gpr_names[n];
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7bc04903548..df6064ad9bf 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,6 +49,7 @@ static int pipe_output = 0;
static const char *output_name = "perf.data";
static int group = 0;
static int realtime_prio = 0;
+static bool nodelay = false;
static bool raw_samples = false;
static bool sample_id_all_avail = true;
static bool system_wide = false;
@@ -307,6 +308,11 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
attr->sample_type |= PERF_SAMPLE_CPU;
}
+ if (nodelay) {
+ attr->watermark = 0;
+ attr->wakeup_events = 1;
+ }
+
attr->mmap = track;
attr->comm = track;
attr->inherit = !no_inherit;
@@ -477,6 +483,7 @@ static void atexit_header(void)
process_buildids();
perf_header__write(&session->header, output, true);
perf_session__delete(session);
+ perf_evsel_list__delete();
symbol__exit();
}
}
@@ -842,6 +849,8 @@ const struct option record_options[] = {
"record events on existing thread id"),
OPT_INTEGER('r', "realtime", &realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
+ OPT_BOOLEAN('D', "no-delay", &nodelay,
+ "collect data without buffering"),
OPT_BOOLEAN('R', "raw-samples", &raw_samples,
"collect raw sample records from all opened counters"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 7a4ebeb8b01..29e7ffd8569 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -489,7 +489,8 @@ static void create_tasks(void)
err = pthread_attr_init(&attr);
BUG_ON(err);
- err = pthread_attr_setstacksize(&attr, (size_t)(16*1024));
+ err = pthread_attr_setstacksize(&attr,
+ (size_t) max(16 * 1024, PTHREAD_STACK_MIN));
BUG_ON(err);
err = pthread_mutex_lock(&start_work_mutex);
BUG_ON(err);
@@ -1842,15 +1843,15 @@ static const char *record_args[] = {
"-f",
"-m", "1024",
"-c", "1",
- "-e", "sched:sched_switch:r",
- "-e", "sched:sched_stat_wait:r",
- "-e", "sched:sched_stat_sleep:r",
- "-e", "sched:sched_stat_iowait:r",
- "-e", "sched:sched_stat_runtime:r",
- "-e", "sched:sched_process_exit:r",
- "-e", "sched:sched_process_fork:r",
- "-e", "sched:sched_wakeup:r",
- "-e", "sched:sched_migrate_task:r",
+ "-e", "sched:sched_switch",
+ "-e", "sched:sched_stat_wait",
+ "-e", "sched:sched_stat_sleep",
+ "-e", "sched:sched_stat_iowait",
+ "-e", "sched:sched_stat_runtime",
+ "-e", "sched:sched_process_exit",
+ "-e", "sched:sched_process_fork",
+ "-e", "sched:sched_wakeup",
+ "-e", "sched:sched_migrate_task",
};
static int __cmd_record(int argc, const char **argv)
@@ -1861,7 +1862,7 @@ static int __cmd_record(int argc, const char **argv)
rec_argc = ARRAY_SIZE(record_args) + argc - 1;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- if (rec_argv)
+ if (rec_argv == NULL)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(record_args); i++)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 02b2d8013a6..0ff11d9b13b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -316,6 +316,8 @@ static int run_perf_stat(int argc __used, const char **argv)
"\t Consider tweaking"
" /proc/sys/kernel/perf_event_paranoid or running as root.",
system_wide ? "system-wide " : "");
+ } else if (errno == ENOENT) {
+ error("%s event is not supported. ", event_name(counter));
} else {
error("open_counter returned with %d (%s). "
"/bin/dmesg may provide additional information.\n",
@@ -683,8 +685,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
nr_counters = ARRAY_SIZE(default_attrs);
for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
- pos = perf_evsel__new(default_attrs[c].type,
- default_attrs[c].config,
+ pos = perf_evsel__new(&default_attrs[c],
nr_counters);
if (pos == NULL)
goto out;
@@ -742,6 +743,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
out_free_fd:
list_for_each_entry(pos, &evsel_list, node)
perf_evsel__free_stat_priv(pos);
+ perf_evsel_list__delete();
out:
thread_map__delete(threads);
threads = NULL;
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 1c984342a57..ed5696198d3 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -234,6 +234,7 @@ out:
return err;
}
+#include "util/cpumap.h"
#include "util/evsel.h"
#include <sys/types.h>
@@ -264,6 +265,7 @@ static int test__open_syscall_event(void)
int err = -1, fd;
struct thread_map *threads;
struct perf_evsel *evsel;
+ struct perf_event_attr attr;
unsigned int nr_open_calls = 111, i;
int id = trace_event__id("sys_enter_open");
@@ -278,7 +280,10 @@ static int test__open_syscall_event(void)
return -1;
}
- evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0);
+ memset(&attr, 0, sizeof(attr));
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.config = id;
+ evsel = perf_evsel__new(&attr, 0);
if (evsel == NULL) {
pr_debug("perf_evsel__new\n");
goto out_thread_map_delete;
@@ -317,6 +322,111 @@ out_thread_map_delete:
return err;
}
+#include <sched.h>
+
+static int test__open_syscall_event_on_all_cpus(void)
+{
+ int err = -1, fd, cpu;
+ struct thread_map *threads;
+ struct cpu_map *cpus;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr;
+ unsigned int nr_open_calls = 111, i;
+ cpu_set_t *cpu_set;
+ size_t cpu_set_size;
+ int id = trace_event__id("sys_enter_open");
+
+ if (id < 0) {
+ pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+ return -1;
+ }
+
+ threads = thread_map__new(-1, getpid());
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ cpus = cpu_map__new(NULL);
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ cpu_set = CPU_ALLOC(cpus->nr);
+
+ if (cpu_set == NULL)
+ goto out_thread_map_delete;
+
+ cpu_set_size = CPU_ALLOC_SIZE(cpus->nr);
+ CPU_ZERO_S(cpu_set_size, cpu_set);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.config = id;
+ evsel = perf_evsel__new(&attr, 0);
+ if (evsel == NULL) {
+ pr_debug("perf_evsel__new\n");
+ goto out_cpu_free;
+ }
+
+ if (perf_evsel__open(evsel, cpus, threads) < 0) {
+ pr_debug("failed to open counter: %s, "
+ "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+ strerror(errno));
+ goto out_evsel_delete;
+ }
+
+ for (cpu = 0; cpu < cpus->nr; ++cpu) {
+ unsigned int ncalls = nr_open_calls + cpu;
+
+ CPU_SET(cpu, cpu_set);
+ sched_setaffinity(0, cpu_set_size, cpu_set);
+ for (i = 0; i < ncalls; ++i) {
+ fd = open("/etc/passwd", O_RDONLY);
+ close(fd);
+ }
+ CPU_CLR(cpu, cpu_set);
+ }
+
+ /*
+ * Here we need to explicitely preallocate the counts, as if
+ * we use the auto allocation it will allocate just for 1 cpu,
+ * as we start by cpu 0.
+ */
+ if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) {
+ pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
+ goto out_close_fd;
+ }
+
+ for (cpu = 0; cpu < cpus->nr; ++cpu) {
+ unsigned int expected;
+
+ if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+ pr_debug("perf_evsel__open_read_on_cpu\n");
+ goto out_close_fd;
+ }
+
+ expected = nr_open_calls + cpu;
+ if (evsel->counts->cpu[cpu].val != expected) {
+ pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %Ld\n",
+ expected, cpu, evsel->counts->cpu[cpu].val);
+ goto out_close_fd;
+ }
+ }
+
+ err = 0;
+out_close_fd:
+ perf_evsel__close_fd(evsel, 1, threads->nr);
+out_evsel_delete:
+ perf_evsel__delete(evsel);
+out_cpu_free:
+ CPU_FREE(cpu_set);
+out_thread_map_delete:
+ thread_map__delete(threads);
+ return err;
+}
+
static struct test {
const char *desc;
int (*func)(void);
@@ -330,6 +440,10 @@ static struct test {
.func = test__open_syscall_event,
},
{
+ .desc = "detect open syscall event on all cpus",
+ .func = test__open_syscall_event_on_all_cpus,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1e67ab9c7eb..05344c6210a 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1471,6 +1471,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
pos->attr.sample_period = default_interval;
}
+ sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
+
symbol_conf.priv_size = (sizeof(struct sym_entry) +
(nr_counters + 1) * sizeof(unsigned long));
@@ -1488,6 +1490,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
out_free_fd:
list_for_each_entry(pos, &evsel_list, node)
perf_evsel__free_mmap(pos);
+ perf_evsel_list__delete();
return status;
}
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 5b1ecd66bb3..595d0f4a710 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -286,8 +286,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
status = p->fn(argc, argv, prefix);
exit_browser(status);
- perf_evsel_list__delete();
-
if (status)
return status & 0xff;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c95267e63c5..f5cfed60af9 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -6,14 +6,13 @@
#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
-struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
+struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
{
struct perf_evsel *evsel = zalloc(sizeof(*evsel));
if (evsel != NULL) {
evsel->idx = idx;
- evsel->attr.type = type;
- evsel->attr.config = config;
+ evsel->attr = *attr;
INIT_LIST_HEAD(&evsel->node);
}
@@ -128,59 +127,75 @@ int __perf_evsel__read(struct perf_evsel *evsel,
return 0;
}
-int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
+static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+ struct thread_map *threads)
{
- int cpu;
+ int cpu, thread;
- if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0)
+ if (evsel->fd == NULL &&
+ perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
return -1;
for (cpu = 0; cpu < cpus->nr; cpu++) {
- FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1,
- cpus->map[cpu], -1, 0);
- if (FD(evsel, cpu, 0) < 0)
- goto out_close;
+ for (thread = 0; thread < threads->nr; thread++) {
+ FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
+ threads->map[thread],
+ cpus->map[cpu], -1, 0);
+ if (FD(evsel, cpu, thread) < 0)
+ goto out_close;
+ }
}
return 0;
out_close:
- while (--cpu >= 0) {
- close(FD(evsel, cpu, 0));
- FD(evsel, cpu, 0) = -1;
- }
+ do {
+ while (--thread >= 0) {
+ close(FD(evsel, cpu, thread));
+ FD(evsel, cpu, thread) = -1;
+ }
+ thread = threads->nr;
+ } while (--cpu >= 0);
return -1;
}
-int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+static struct {
+ struct cpu_map map;
+ int cpus[1];
+} empty_cpu_map = {
+ .map.nr = 1,
+ .cpus = { -1, },
+};
+
+static struct {
+ struct thread_map map;
+ int threads[1];
+} empty_thread_map = {
+ .map.nr = 1,
+ .threads = { -1, },
+};
+
+int perf_evsel__open(struct perf_evsel *evsel,
+ struct cpu_map *cpus, struct thread_map *threads)
{
- int thread;
-
- if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr))
- return -1;
- for (thread = 0; thread < threads->nr; thread++) {
- FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr,
- threads->map[thread], -1, -1, 0);
- if (FD(evsel, 0, thread) < 0)
- goto out_close;
+ if (cpus == NULL) {
+ /* Work around old compiler warnings about strict aliasing */
+ cpus = &empty_cpu_map.map;
}
- return 0;
+ if (threads == NULL)
+ threads = &empty_thread_map.map;
-out_close:
- while (--thread >= 0) {
- close(FD(evsel, 0, thread));
- FD(evsel, 0, thread) = -1;
- }
- return -1;
+ return __perf_evsel__open(evsel, cpus, threads);
}
-int perf_evsel__open(struct perf_evsel *evsel,
- struct cpu_map *cpus, struct thread_map *threads)
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
{
- if (threads == NULL)
- return perf_evsel__open_per_cpu(evsel, cpus);
+ return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
+}
- return perf_evsel__open_per_thread(evsel, threads);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+{
+ return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a0ccd69c3fc..b2d755fe88a 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -37,7 +37,7 @@ struct perf_evsel {
struct cpu_map;
struct thread_map;
-struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx);
+struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
void perf_evsel__delete(struct perf_evsel *evsel);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 649083f27e0..5cb6f4bde90 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -490,6 +490,31 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
return EVT_HANDLED_ALL;
}
+static int store_event_type(const char *orgname)
+{
+ char filename[PATH_MAX], *c;
+ FILE *file;
+ int id, n;
+
+ sprintf(filename, "%s/", debugfs_path);
+ strncat(filename, orgname, strlen(orgname));
+ strcat(filename, "/id");
+
+ c = strchr(filename, ':');
+ if (c)
+ *c = '/';
+
+ file = fopen(filename, "r");
+ if (!file)
+ return 0;
+ n = fscanf(file, "%i", &id);
+ fclose(file);
+ if (n < 1) {
+ pr_err("cannot store event ID\n");
+ return -EINVAL;
+ }
+ return perf_header__push_event(id, orgname);
+}
static enum event_result parse_tracepoint_event(const char **strp,
struct perf_event_attr *attr)
@@ -533,9 +558,13 @@ static enum event_result parse_tracepoint_event(const char **strp,
*strp += strlen(sys_name) + evt_length;
return parse_multiple_tracepoint_event(sys_name, evt_name,
flags);
- } else
+ } else {
+ if (store_event_type(evt_name) < 0)
+ return EVT_FAILED;
+
return parse_single_tracepoint_event(sys_name, evt_name,
evt_length, attr, strp);
+ }
}
static enum event_result
@@ -778,41 +807,11 @@ modifier:
return ret;
}
-static int store_event_type(const char *orgname)
-{
- char filename[PATH_MAX], *c;
- FILE *file;
- int id, n;
-
- sprintf(filename, "%s/", debugfs_path);
- strncat(filename, orgname, strlen(orgname));
- strcat(filename, "/id");
-
- c = strchr(filename, ':');
- if (c)
- *c = '/';
-
- file = fopen(filename, "r");
- if (!file)
- return 0;
- n = fscanf(file, "%i", &id);
- fclose(file);
- if (n < 1) {
- pr_err("cannot store event ID\n");
- return -EINVAL;
- }
- return perf_header__push_event(id, orgname);
-}
-
int parse_events(const struct option *opt __used, const char *str, int unset __used)
{
struct perf_event_attr attr;
enum event_result ret;
- if (strchr(str, ':'))
- if (store_event_type(str) < 0)
- return -1;
-
for (;;) {
memset(&attr, 0, sizeof(attr));
ret = parse_event_symbols(&str, &attr);
@@ -824,7 +823,7 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
if (ret != EVT_HANDLED_ALL) {
struct perf_evsel *evsel;
- evsel = perf_evsel__new(attr.type, attr.config,
+ evsel = perf_evsel__new(&attr,
nr_counters);
if (evsel == NULL)
return -1;
@@ -1014,8 +1013,15 @@ void print_events(void)
int perf_evsel_list__create_default(void)
{
- struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE,
- PERF_COUNT_HW_CPU_CYCLES, 0);
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+
+ evsel = perf_evsel__new(&attr, 0);
+
if (evsel == NULL)
return -ENOMEM;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 6fb4694d05f..313dac2d94c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1007,7 +1007,7 @@ more:
if (size == 0)
size = 8;
- if (head + event->header.size >= mmap_size) {
+ if (head + event->header.size > mmap_size) {
if (mmaps[map_idx]) {
munmap(mmaps[map_idx], mmap_size);
mmaps[map_idx] = NULL;
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
new file mode 100644
index 00000000000..fd8e1f1297a
--- /dev/null
+++ b/tools/power/x86/turbostat/Makefile
@@ -0,0 +1,8 @@
+turbostat : turbostat.c
+
+clean :
+ rm -f turbostat
+
+install :
+ install turbostat /usr/bin/turbostat
+ install turbostat.8 /usr/share/man/man8
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
new file mode 100644
index 00000000000..ff75125deed
--- /dev/null
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -0,0 +1,172 @@
+.TH TURBOSTAT 8
+.SH NAME
+turbostat \- Report processor frequency and idle statistics
+.SH SYNOPSIS
+.ft B
+.B turbostat
+.RB [ "\-v" ]
+.RB [ "\-M MSR#" ]
+.RB command
+.br
+.B turbostat
+.RB [ "\-v" ]
+.RB [ "\-M MSR#" ]
+.RB [ "\-i interval_sec" ]
+.SH DESCRIPTION
+\fBturbostat \fP reports processor topology, frequency
+and idle power state statistics on modern X86 processors.
+Either \fBcommand\fP is forked and statistics are printed
+upon its completion, or statistics are printed periodically.
+
+\fBturbostat \fP
+requires that the processor
+supports an "invariant" TSC, plus the APERF and MPERF MSRs.
+\fBturbostat \fP will report idle cpu power state residency
+on processors that additionally support C-state residency counters.
+
+.SS Options
+The \fB-v\fP option increases verbosity.
+.PP
+The \fB-M MSR#\fP option dumps the specified MSR,
+in addition to the usual frequency and idle statistics.
+.PP
+The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds.
+The default is 5 seconds.
+.PP
+The \fBcommand\fP parameter forks \fBcommand\fP and upon its exit,
+displays the statistics gathered since it was forked.
+.PP
+.SH FIELD DESCRIPTIONS
+.nf
+\fBpkg\fP processor package number.
+\fBcore\fP processor core number.
+\fBCPU\fP Linux CPU (logical processor) number.
+\fB%c0\fP percent of the interval that the CPU retired instructions.
+\fBGHz\fP average clock rate while the CPU was in c0 state.
+\fBTSC\fP average GHz that the TSC ran during the entire interval.
+\fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states.
+\fB%pc3, %pc6\fP percentage residency in hardware package idle states.
+.fi
+.PP
+.SH EXAMPLE
+Without any parameters, turbostat prints out counters ever 5 seconds.
+(override interval with "-i sec" option, or specify a command
+for turbostat to fork).
+
+The first row of statistics reflect the average for the entire system.
+Subsequent rows show per-CPU statistics.
+
+.nf
+[root@x980]# ./turbostat
+core CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
+ 0.04 1.62 3.38 0.11 0.00 99.85 0.00 95.07
+ 0 0 0.04 1.62 3.38 0.06 0.00 99.90 0.00 95.07
+ 0 6 0.02 1.62 3.38 0.08 0.00 99.90 0.00 95.07
+ 1 2 0.10 1.62 3.38 0.29 0.00 99.61 0.00 95.07
+ 1 8 0.11 1.62 3.38 0.28 0.00 99.61 0.00 95.07
+ 2 4 0.01 1.62 3.38 0.01 0.00 99.98 0.00 95.07
+ 2 10 0.01 1.61 3.38 0.02 0.00 99.98 0.00 95.07
+ 8 1 0.07 1.62 3.38 0.15 0.00 99.78 0.00 95.07
+ 8 7 0.03 1.62 3.38 0.19 0.00 99.78 0.00 95.07
+ 9 3 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07
+ 9 9 0.01 1.62 3.38 0.02 0.00 99.98 0.00 95.07
+ 10 5 0.01 1.62 3.38 0.13 0.00 99.86 0.00 95.07
+ 10 11 0.08 1.62 3.38 0.05 0.00 99.86 0.00 95.07
+.fi
+.SH VERBOSE EXAMPLE
+The "-v" option adds verbosity to the output:
+
+.nf
+GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2)
+12 * 133 = 1600 MHz max efficiency
+25 * 133 = 3333 MHz TSC frequency
+26 * 133 = 3467 MHz max turbo 4 active cores
+26 * 133 = 3467 MHz max turbo 3 active cores
+27 * 133 = 3600 MHz max turbo 2 active cores
+27 * 133 = 3600 MHz max turbo 1 active cores
+
+.fi
+The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
+available at the minimum package voltage. The \fBTSC frequency\fP is the nominal
+maximum frequency of the processor if turbo-mode were not available. This frequency
+should be sustainable on all CPUs indefinitely, given nominal power and cooling.
+The remaining rows show what maximum turbo frequency is possible
+depending on the number of idle cores. Note that this information is
+not available on all processors.
+.SH FORK EXAMPLE
+If turbostat is invoked with a command, it will fork that command
+and output the statistics gathered when the command exits.
+eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
+until ^C while the other CPUs are mostly idle:
+
+.nf
+[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null
+
+^Ccore CPU %c0 GHz TSC %c1 %c3 %c6 %pc3 %pc6
+ 8.49 3.63 3.38 16.23 0.66 74.63 0.00 0.00
+ 0 0 1.22 3.62 3.38 32.18 0.00 66.60 0.00 0.00
+ 0 6 0.40 3.61 3.38 33.00 0.00 66.60 0.00 0.00
+ 1 2 0.11 3.14 3.38 0.19 3.95 95.75 0.00 0.00
+ 1 8 0.05 2.88 3.38 0.25 3.95 95.75 0.00 0.00
+ 2 4 0.00 3.13 3.38 0.02 0.00 99.98 0.00 0.00
+ 2 10 0.00 3.09 3.38 0.02 0.00 99.98 0.00 0.00
+ 8 1 0.04 3.50 3.38 14.43 0.00 85.54 0.00 0.00
+ 8 7 0.03 2.98 3.38 14.43 0.00 85.54 0.00 0.00
+ 9 3 0.00 3.16 3.38 100.00 0.00 0.00 0.00 0.00
+ 9 9 99.93 3.63 3.38 0.06 0.00 0.00 0.00 0.00
+ 10 5 0.01 2.82 3.38 0.08 0.00 99.91 0.00 0.00
+ 10 11 0.02 3.36 3.38 0.06 0.00 99.91 0.00 0.00
+6.950866 sec
+
+.fi
+Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit
+while the other processors are generally in various states of idle.
+
+Note that cpu3 is an HT sibling sharing core9
+with cpu9, and thus it is unable to get to an idle state
+deeper than c1 while cpu9 is busy.
+
+Note that turbostat reports average GHz of 3.61, while
+the arithmetic average of the GHz column above is 3.24.
+This is a weighted average, where the weight is %c0. ie. it is the total number of
+un-halted cycles elapsed per time divided by the number of CPUs.
+.SH NOTES
+
+.B "turbostat "
+must be run as root.
+
+.B "turbostat "
+reads hardware counters, but doesn't write them.
+So it will not interfere with the OS or other programs, including
+multiple invocations of itself.
+
+\fBturbostat \fP
+may work poorly on Linux-2.6.20 through 2.6.29,
+as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF
+in those kernels.
+
+The APERF, MPERF MSRs are defined to count non-halted cycles.
+Although it is not guaranteed by the architecture, turbostat assumes
+that they count at TSC rate, which is true on all processors tested to date.
+
+.SH REFERENCES
+"IntelÂŽ Turbo Boost Technology
+in Intel® Core™ Microarchitecture (Nehalem) Based Processors"
+http://download.intel.com/design/processor/applnots/320354.pdf
+
+"IntelÂŽ 64 and IA-32 Architectures Software Developer's Manual
+Volume 3B: System Programming Guide"
+http://www.intel.com/products/processor/manuals/
+
+.SH FILES
+.ta
+.nf
+/dev/cpu/*/msr
+.fi
+
+.SH "SEE ALSO"
+msr(4), vmstat(8)
+.PP
+.SH AUTHORS
+.nf
+Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
new file mode 100644
index 00000000000..4c6983de6fd
--- /dev/null
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -0,0 +1,1048 @@
+/*
+ * turbostat -- show CPU frequency and C-state residency
+ * on modern Intel turbo-capable processors.
+ *
+ * Copyright (c) 2010, Intel Corporation.
+ * Len Brown <len.brown@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <string.h>
+#include <ctype.h>
+
+#define MSR_TSC 0x10
+#define MSR_NEHALEM_PLATFORM_INFO 0xCE
+#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD
+#define MSR_APERF 0xE8
+#define MSR_MPERF 0xE7
+#define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */
+#define MSR_PKG_C3_RESIDENCY 0x3F8
+#define MSR_PKG_C6_RESIDENCY 0x3F9
+#define MSR_PKG_C7_RESIDENCY 0x3FA /* SNB only */
+#define MSR_CORE_C3_RESIDENCY 0x3FC
+#define MSR_CORE_C6_RESIDENCY 0x3FD
+#define MSR_CORE_C7_RESIDENCY 0x3FE /* SNB only */
+
+char *proc_stat = "/proc/stat";
+unsigned int interval_sec = 5; /* set with -i interval_sec */
+unsigned int verbose; /* set with -v */
+unsigned int skip_c0;
+unsigned int skip_c1;
+unsigned int do_nhm_cstates;
+unsigned int do_snb_cstates;
+unsigned int has_aperf;
+unsigned int units = 1000000000; /* Ghz etc */
+unsigned int genuine_intel;
+unsigned int has_invariant_tsc;
+unsigned int do_nehalem_platform_info;
+unsigned int do_nehalem_turbo_ratio_limit;
+unsigned int extra_msr_offset;
+double bclk;
+unsigned int show_pkg;
+unsigned int show_core;
+unsigned int show_cpu;
+
+int aperf_mperf_unstable;
+int backwards_count;
+char *progname;
+int need_reinitialize;
+
+int num_cpus;
+
+typedef struct per_cpu_counters {
+ unsigned long long tsc; /* per thread */
+ unsigned long long aperf; /* per thread */
+ unsigned long long mperf; /* per thread */
+ unsigned long long c1; /* per thread (calculated) */
+ unsigned long long c3; /* per core */
+ unsigned long long c6; /* per core */
+ unsigned long long c7; /* per core */
+ unsigned long long pc2; /* per package */
+ unsigned long long pc3; /* per package */
+ unsigned long long pc6; /* per package */
+ unsigned long long pc7; /* per package */
+ unsigned long long extra_msr; /* per thread */
+ int pkg;
+ int core;
+ int cpu;
+ struct per_cpu_counters *next;
+} PCC;
+
+PCC *pcc_even;
+PCC *pcc_odd;
+PCC *pcc_delta;
+PCC *pcc_average;
+struct timeval tv_even;
+struct timeval tv_odd;
+struct timeval tv_delta;
+
+unsigned long long get_msr(int cpu, off_t offset)
+{
+ ssize_t retval;
+ unsigned long long msr;
+ char pathname[32];
+ int fd;
+
+ sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+ fd = open(pathname, O_RDONLY);
+ if (fd < 0) {
+ perror(pathname);
+ need_reinitialize = 1;
+ return 0;
+ }
+
+ retval = pread(fd, &msr, sizeof msr, offset);
+ if (retval != sizeof msr) {
+ fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n",
+ cpu, offset, retval);
+ exit(-2);
+ }
+
+ close(fd);
+ return msr;
+}
+
+void print_header()
+{
+ if (show_pkg)
+ fprintf(stderr, "pkg ");
+ if (show_core)
+ fprintf(stderr, "core");
+ if (show_cpu)
+ fprintf(stderr, " CPU");
+ if (do_nhm_cstates)
+ fprintf(stderr, " %%c0 ");
+ if (has_aperf)
+ fprintf(stderr, " GHz");
+ fprintf(stderr, " TSC");
+ if (do_nhm_cstates)
+ fprintf(stderr, " %%c1 ");
+ if (do_nhm_cstates)
+ fprintf(stderr, " %%c3 ");
+ if (do_nhm_cstates)
+ fprintf(stderr, " %%c6 ");
+ if (do_snb_cstates)
+ fprintf(stderr, " %%c7 ");
+ if (do_snb_cstates)
+ fprintf(stderr, " %%pc2 ");
+ if (do_nhm_cstates)
+ fprintf(stderr, " %%pc3 ");
+ if (do_nhm_cstates)
+ fprintf(stderr, " %%pc6 ");
+ if (do_snb_cstates)
+ fprintf(stderr, " %%pc7 ");
+ if (extra_msr_offset)
+ fprintf(stderr, " MSR 0x%x ", extra_msr_offset);
+
+ putc('\n', stderr);
+}
+
+void dump_pcc(PCC *pcc)
+{
+ fprintf(stderr, "package: %d ", pcc->pkg);
+ fprintf(stderr, "core:: %d ", pcc->core);
+ fprintf(stderr, "CPU: %d ", pcc->cpu);
+ fprintf(stderr, "TSC: %016llX\n", pcc->tsc);
+ fprintf(stderr, "c3: %016llX\n", pcc->c3);
+ fprintf(stderr, "c6: %016llX\n", pcc->c6);
+ fprintf(stderr, "c7: %016llX\n", pcc->c7);
+ fprintf(stderr, "aperf: %016llX\n", pcc->aperf);
+ fprintf(stderr, "pc2: %016llX\n", pcc->pc2);
+ fprintf(stderr, "pc3: %016llX\n", pcc->pc3);
+ fprintf(stderr, "pc6: %016llX\n", pcc->pc6);
+ fprintf(stderr, "pc7: %016llX\n", pcc->pc7);
+ fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, pcc->extra_msr);
+}
+
+void dump_list(PCC *pcc)
+{
+ printf("dump_list 0x%p\n", pcc);
+
+ for (; pcc; pcc = pcc->next)
+ dump_pcc(pcc);
+}
+
+void print_pcc(PCC *p)
+{
+ double interval_float;
+
+ interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
+
+ /* topology columns, print blanks on 1st (average) line */
+ if (p == pcc_average) {
+ if (show_pkg)
+ fprintf(stderr, " ");
+ if (show_core)
+ fprintf(stderr, " ");
+ if (show_cpu)
+ fprintf(stderr, " ");
+ } else {
+ if (show_pkg)
+ fprintf(stderr, "%4d", p->pkg);
+ if (show_core)
+ fprintf(stderr, "%4d", p->core);
+ if (show_cpu)
+ fprintf(stderr, "%4d", p->cpu);
+ }
+
+ /* %c0 */
+ if (do_nhm_cstates) {
+ if (!skip_c0)
+ fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc);
+ else
+ fprintf(stderr, " ****");
+ }
+
+ /* GHz */
+ if (has_aperf) {
+ if (!aperf_mperf_unstable) {
+ fprintf(stderr, "%5.2f",
+ 1.0 * p->tsc / units * p->aperf /
+ p->mperf / interval_float);
+ } else {
+ if (p->aperf > p->tsc || p->mperf > p->tsc) {
+ fprintf(stderr, " ****");
+ } else {
+ fprintf(stderr, "%4.1f*",
+ 1.0 * p->tsc /
+ units * p->aperf /
+ p->mperf / interval_float);
+ }
+ }
+ }
+
+ /* TSC */
+ fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float);
+
+ if (do_nhm_cstates) {
+ if (!skip_c1)
+ fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc);
+ else
+ fprintf(stderr, " ****");
+ }
+ if (do_nhm_cstates)
+ fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc);
+ if (do_nhm_cstates)
+ fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc);
+ if (do_snb_cstates)
+ fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc);
+ if (do_snb_cstates)
+ fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc);
+ if (do_nhm_cstates)
+ fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc);
+ if (do_nhm_cstates)
+ fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc);
+ if (do_snb_cstates)
+ fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc);
+ if (extra_msr_offset)
+ fprintf(stderr, " 0x%016llx", p->extra_msr);
+ putc('\n', stderr);
+}
+
+void print_counters(PCC *cnt)
+{
+ PCC *pcc;
+
+ print_header();
+
+ if (num_cpus > 1)
+ print_pcc(pcc_average);
+
+ for (pcc = cnt; pcc != NULL; pcc = pcc->next)
+ print_pcc(pcc);
+
+}
+
+#define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after))
+
+
+int compute_delta(PCC *after, PCC *before, PCC *delta)
+{
+ int errors = 0;
+ int perf_err = 0;
+
+ skip_c0 = skip_c1 = 0;
+
+ for ( ; after && before && delta;
+ after = after->next, before = before->next, delta = delta->next) {
+ if (before->cpu != after->cpu) {
+ printf("cpu configuration changed: %d != %d\n",
+ before->cpu, after->cpu);
+ return -1;
+ }
+
+ if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) {
+ fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n",
+ before->cpu, before->tsc, after->tsc);
+ errors++;
+ }
+ /* check for TSC < 1 Mcycles over interval */
+ if (delta->tsc < (1000 * 1000)) {
+ fprintf(stderr, "Insanely slow TSC rate,"
+ " TSC stops in idle?\n");
+ fprintf(stderr, "You can disable all c-states"
+ " by booting with \"idle=poll\"\n");
+ fprintf(stderr, "or just the deep ones with"
+ " \"processor.max_cstate=1\"\n");
+ exit(-3);
+ }
+ if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) {
+ fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n",
+ before->cpu, before->c3, after->c3);
+ errors++;
+ }
+ if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) {
+ fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n",
+ before->cpu, before->c6, after->c6);
+ errors++;
+ }
+ if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) {
+ fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n",
+ before->cpu, before->c7, after->c7);
+ errors++;
+ }
+ if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) {
+ fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n",
+ before->cpu, before->pc2, after->pc2);
+ errors++;
+ }
+ if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) {
+ fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n",
+ before->cpu, before->pc3, after->pc3);
+ errors++;
+ }
+ if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) {
+ fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n",
+ before->cpu, before->pc6, after->pc6);
+ errors++;
+ }
+ if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) {
+ fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n",
+ before->cpu, before->pc7, after->pc7);
+ errors++;
+ }
+
+ perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf);
+ if (perf_err) {
+ fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n",
+ before->cpu, before->aperf, after->aperf);
+ }
+ perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf);
+ if (perf_err) {
+ fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n",
+ before->cpu, before->mperf, after->mperf);
+ }
+ if (perf_err) {
+ if (!aperf_mperf_unstable) {
+ fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
+ fprintf(stderr, "* Frequency results do not cover entire interval *\n");
+ fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
+
+ aperf_mperf_unstable = 1;
+ }
+ /*
+ * mperf delta is likely a huge "positive" number
+ * can not use it for calculating c0 time
+ */
+ skip_c0 = 1;
+ skip_c1 = 1;
+ }
+
+ /*
+ * As mperf and tsc collection are not atomic,
+ * it is possible for mperf's non-halted cycles
+ * to exceed TSC's all cycles: show c1 = 0% in that case.
+ */
+ if (delta->mperf > delta->tsc)
+ delta->c1 = 0;
+ else /* normal case, derive c1 */
+ delta->c1 = delta->tsc - delta->mperf
+ - delta->c3 - delta->c6 - delta->c7;
+
+ if (delta->mperf == 0)
+ delta->mperf = 1; /* divide by 0 protection */
+
+ /*
+ * for "extra msr", just copy the latest w/o subtracting
+ */
+ delta->extra_msr = after->extra_msr;
+ if (errors) {
+ fprintf(stderr, "ERROR cpu%d before:\n", before->cpu);
+ dump_pcc(before);
+ fprintf(stderr, "ERROR cpu%d after:\n", before->cpu);
+ dump_pcc(after);
+ errors = 0;
+ }
+ }
+ return 0;
+}
+
+void compute_average(PCC *delta, PCC *avg)
+{
+ PCC *sum;
+
+ sum = calloc(1, sizeof(PCC));
+ if (sum == NULL) {
+ perror("calloc sum");
+ exit(1);
+ }
+
+ for (; delta; delta = delta->next) {
+ sum->tsc += delta->tsc;
+ sum->c1 += delta->c1;
+ sum->c3 += delta->c3;
+ sum->c6 += delta->c6;
+ sum->c7 += delta->c7;
+ sum->aperf += delta->aperf;
+ sum->mperf += delta->mperf;
+ sum->pc2 += delta->pc2;
+ sum->pc3 += delta->pc3;
+ sum->pc6 += delta->pc6;
+ sum->pc7 += delta->pc7;
+ }
+ avg->tsc = sum->tsc/num_cpus;
+ avg->c1 = sum->c1/num_cpus;
+ avg->c3 = sum->c3/num_cpus;
+ avg->c6 = sum->c6/num_cpus;
+ avg->c7 = sum->c7/num_cpus;
+ avg->aperf = sum->aperf/num_cpus;
+ avg->mperf = sum->mperf/num_cpus;
+ avg->pc2 = sum->pc2/num_cpus;
+ avg->pc3 = sum->pc3/num_cpus;
+ avg->pc6 = sum->pc6/num_cpus;
+ avg->pc7 = sum->pc7/num_cpus;
+
+ free(sum);
+}
+
+void get_counters(PCC *pcc)
+{
+ for ( ; pcc; pcc = pcc->next) {
+ pcc->tsc = get_msr(pcc->cpu, MSR_TSC);
+ if (do_nhm_cstates)
+ pcc->c3 = get_msr(pcc->cpu, MSR_CORE_C3_RESIDENCY);
+ if (do_nhm_cstates)
+ pcc->c6 = get_msr(pcc->cpu, MSR_CORE_C6_RESIDENCY);
+ if (do_snb_cstates)
+ pcc->c7 = get_msr(pcc->cpu, MSR_CORE_C7_RESIDENCY);
+ if (has_aperf)
+ pcc->aperf = get_msr(pcc->cpu, MSR_APERF);
+ if (has_aperf)
+ pcc->mperf = get_msr(pcc->cpu, MSR_MPERF);
+ if (do_snb_cstates)
+ pcc->pc2 = get_msr(pcc->cpu, MSR_PKG_C2_RESIDENCY);
+ if (do_nhm_cstates)
+ pcc->pc3 = get_msr(pcc->cpu, MSR_PKG_C3_RESIDENCY);
+ if (do_nhm_cstates)
+ pcc->pc6 = get_msr(pcc->cpu, MSR_PKG_C6_RESIDENCY);
+ if (do_snb_cstates)
+ pcc->pc7 = get_msr(pcc->cpu, MSR_PKG_C7_RESIDENCY);
+ if (extra_msr_offset)
+ pcc->extra_msr = get_msr(pcc->cpu, extra_msr_offset);
+ }
+}
+
+
+void print_nehalem_info()
+{
+ unsigned long long msr;
+ unsigned int ratio;
+
+ if (!do_nehalem_platform_info)
+ return;
+
+ msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO);
+
+ ratio = (msr >> 40) & 0xFF;
+ fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 8) & 0xFF;
+ fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
+ ratio, bclk, ratio * bclk);
+
+ if (verbose > 1)
+ fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr);
+
+ if (!do_nehalem_turbo_ratio_limit)
+ return;
+
+ msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT);
+
+ ratio = (msr >> 24) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 16) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 8) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 0) & 0xFF;
+ if (ratio)
+ fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+}
+
+void free_counter_list(PCC *list)
+{
+ PCC *p;
+
+ for (p = list; p; ) {
+ PCC *free_me;
+
+ free_me = p;
+ p = p->next;
+ free(free_me);
+ }
+ return;
+}
+
+void free_all_counters(void)
+{
+ free_counter_list(pcc_even);
+ pcc_even = NULL;
+
+ free_counter_list(pcc_odd);
+ pcc_odd = NULL;
+
+ free_counter_list(pcc_delta);
+ pcc_delta = NULL;
+
+ free_counter_list(pcc_average);
+ pcc_average = NULL;
+}
+
+void insert_cpu_counters(PCC **list, PCC *new)
+{
+ PCC *prev;
+
+ /*
+ * list was empty
+ */
+ if (*list == NULL) {
+ new->next = *list;
+ *list = new;
+ return;
+ }
+
+ show_cpu = 1; /* there is more than one CPU */
+
+ /*
+ * insert on front of list.
+ * It is sorted by ascending package#, core#, cpu#
+ */
+ if (((*list)->pkg > new->pkg) ||
+ (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) ||
+ (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) {
+ new->next = *list;
+ *list = new;
+ return;
+ }
+
+ prev = *list;
+
+ while (prev->next && (prev->next->pkg < new->pkg)) {
+ prev = prev->next;
+ show_pkg = 1; /* there is more than 1 package */
+ }
+
+ while (prev->next && (prev->next->pkg == new->pkg)
+ && (prev->next->core < new->core)) {
+ prev = prev->next;
+ show_core = 1; /* there is more than 1 core */
+ }
+
+ while (prev->next && (prev->next->pkg == new->pkg)
+ && (prev->next->core == new->core)
+ && (prev->next->cpu < new->cpu)) {
+ prev = prev->next;
+ }
+
+ /*
+ * insert after "prev"
+ */
+ new->next = prev->next;
+ prev->next = new;
+
+ return;
+}
+
+void alloc_new_cpu_counters(int pkg, int core, int cpu)
+{
+ PCC *new;
+
+ if (verbose > 1)
+ printf("pkg%d core%d, cpu%d\n", pkg, core, cpu);
+
+ new = (PCC *)calloc(1, sizeof(PCC));
+ if (new == NULL) {
+ perror("calloc");
+ exit(1);
+ }
+ new->pkg = pkg;
+ new->core = core;
+ new->cpu = cpu;
+ insert_cpu_counters(&pcc_odd, new);
+
+ new = (PCC *)calloc(1, sizeof(PCC));
+ if (new == NULL) {
+ perror("calloc");
+ exit(1);
+ }
+ new->pkg = pkg;
+ new->core = core;
+ new->cpu = cpu;
+ insert_cpu_counters(&pcc_even, new);
+
+ new = (PCC *)calloc(1, sizeof(PCC));
+ if (new == NULL) {
+ perror("calloc");
+ exit(1);
+ }
+ new->pkg = pkg;
+ new->core = core;
+ new->cpu = cpu;
+ insert_cpu_counters(&pcc_delta, new);
+
+ new = (PCC *)calloc(1, sizeof(PCC));
+ if (new == NULL) {
+ perror("calloc");
+ exit(1);
+ }
+ new->pkg = pkg;
+ new->core = core;
+ new->cpu = cpu;
+ pcc_average = new;
+}
+
+int get_physical_package_id(int cpu)
+{
+ char path[64];
+ FILE *filep;
+ int pkg;
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
+ filep = fopen(path, "r");
+ if (filep == NULL) {
+ perror(path);
+ exit(1);
+ }
+ fscanf(filep, "%d", &pkg);
+ fclose(filep);
+ return pkg;
+}
+
+int get_core_id(int cpu)
+{
+ char path[64];
+ FILE *filep;
+ int core;
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
+ filep = fopen(path, "r");
+ if (filep == NULL) {
+ perror(path);
+ exit(1);
+ }
+ fscanf(filep, "%d", &core);
+ fclose(filep);
+ return core;
+}
+
+/*
+ * run func(index, cpu) on every cpu in /proc/stat
+ */
+
+int for_all_cpus(void (func)(int, int, int))
+{
+ FILE *fp;
+ int cpu_count;
+ int retval;
+
+ fp = fopen(proc_stat, "r");
+ if (fp == NULL) {
+ perror(proc_stat);
+ exit(1);
+ }
+
+ retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
+ if (retval != 0) {
+ perror("/proc/stat format");
+ exit(1);
+ }
+
+ for (cpu_count = 0; ; cpu_count++) {
+ int cpu;
+
+ retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu);
+ if (retval != 1)
+ break;
+
+ func(get_physical_package_id(cpu), get_core_id(cpu), cpu);
+ }
+ fclose(fp);
+ return cpu_count;
+}
+
+void re_initialize(void)
+{
+ printf("turbostat: topology changed, re-initializing.\n");
+ free_all_counters();
+ num_cpus = for_all_cpus(alloc_new_cpu_counters);
+ need_reinitialize = 0;
+ printf("num_cpus is now %d\n", num_cpus);
+}
+
+void dummy(int pkg, int core, int cpu) { return; }
+/*
+ * check to see if a cpu came on-line
+ */
+void verify_num_cpus()
+{
+ int new_num_cpus;
+
+ new_num_cpus = for_all_cpus(dummy);
+
+ if (new_num_cpus != num_cpus) {
+ if (verbose)
+ printf("num_cpus was %d, is now %d\n",
+ num_cpus, new_num_cpus);
+ need_reinitialize = 1;
+ }
+
+ return;
+}
+
+void turbostat_loop()
+{
+restart:
+ get_counters(pcc_even);
+ gettimeofday(&tv_even, (struct timezone *)NULL);
+
+ while (1) {
+ verify_num_cpus();
+ if (need_reinitialize) {
+ re_initialize();
+ goto restart;
+ }
+ sleep(interval_sec);
+ get_counters(pcc_odd);
+ gettimeofday(&tv_odd, (struct timezone *)NULL);
+
+ compute_delta(pcc_odd, pcc_even, pcc_delta);
+ timersub(&tv_odd, &tv_even, &tv_delta);
+ compute_average(pcc_delta, pcc_average);
+ print_counters(pcc_delta);
+ if (need_reinitialize) {
+ re_initialize();
+ goto restart;
+ }
+ sleep(interval_sec);
+ get_counters(pcc_even);
+ gettimeofday(&tv_even, (struct timezone *)NULL);
+ compute_delta(pcc_even, pcc_odd, pcc_delta);
+ timersub(&tv_even, &tv_odd, &tv_delta);
+ compute_average(pcc_delta, pcc_average);
+ print_counters(pcc_delta);
+ }
+}
+
+void check_dev_msr()
+{
+ struct stat sb;
+
+ if (stat("/dev/cpu/0/msr", &sb)) {
+ fprintf(stderr, "no /dev/cpu/0/msr\n");
+ fprintf(stderr, "Try \"# modprobe msr\"\n");
+ exit(-5);
+ }
+}
+
+void check_super_user()
+{
+ if (getuid() != 0) {
+ fprintf(stderr, "must be root\n");
+ exit(-6);
+ }
+}
+
+int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case 0x1A: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
+ case 0x1E: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
+ case 0x1F: /* Core i7 and i5 Processor - Nehalem */
+ case 0x25: /* Westmere Client - Clarkdale, Arrandale */
+ case 0x2C: /* Westmere EP - Gulftown */
+ case 0x2A: /* SNB */
+ case 0x2D: /* SNB Xeon */
+ return 1;
+ case 0x2E: /* Nehalem-EX Xeon - Beckton */
+ case 0x2F: /* Westmere-EX Xeon - Eagleton */
+ default:
+ return 0;
+ }
+}
+
+int is_snb(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case 0x2A:
+ case 0x2D:
+ return 1;
+ }
+ return 0;
+}
+
+double discover_bclk(unsigned int family, unsigned int model)
+{
+ if (is_snb(family, model))
+ return 100.00;
+ else
+ return 133.33;
+}
+
+void check_cpuid()
+{
+ unsigned int eax, ebx, ecx, edx, max_level;
+ unsigned int fms, family, model, stepping;
+
+ eax = ebx = ecx = edx = 0;
+
+ asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0));
+
+ if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
+ genuine_intel = 1;
+
+ if (verbose)
+ fprintf(stderr, "%.4s%.4s%.4s ",
+ (char *)&ebx, (char *)&edx, (char *)&ecx);
+
+ asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
+ family = (fms >> 8) & 0xf;
+ model = (fms >> 4) & 0xf;
+ stepping = fms & 0xf;
+ if (family == 6 || family == 0xf)
+ model += ((fms >> 16) & 0xf) << 4;
+
+ if (verbose)
+ fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
+ max_level, family, model, stepping, family, model, stepping);
+
+ if (!(edx & (1 << 5))) {
+ fprintf(stderr, "CPUID: no MSR\n");
+ exit(1);
+ }
+
+ /*
+ * check max extended function levels of CPUID.
+ * This is needed to check for invariant TSC.
+ * This check is valid for both Intel and AMD.
+ */
+ ebx = ecx = edx = 0;
+ asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000));
+
+ if (max_level < 0x80000007) {
+ fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level);
+ exit(1);
+ }
+
+ /*
+ * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
+ * this check is valid for both Intel and AMD
+ */
+ asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007));
+ has_invariant_tsc = edx && (1 << 8);
+
+ if (!has_invariant_tsc) {
+ fprintf(stderr, "No invariant TSC\n");
+ exit(1);
+ }
+
+ /*
+ * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
+ * this check is valid for both Intel and AMD
+ */
+
+ asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6));
+ has_aperf = ecx && (1 << 0);
+ if (!has_aperf) {
+ fprintf(stderr, "No APERF MSR\n");
+ exit(1);
+ }
+
+ do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
+ do_nhm_cstates = genuine_intel; /* all Intel w/ non-stop TSC have NHM counters */
+ do_snb_cstates = is_snb(family, model);
+ bclk = discover_bclk(family, model);
+
+ do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
+}
+
+
+void usage()
+{
+ fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n",
+ progname);
+ exit(1);
+}
+
+
+/*
+ * in /dev/cpu/ return success for names that are numbers
+ * ie. filter out ".", "..", "microcode".
+ */
+int dir_filter(const struct dirent *dirp)
+{
+ if (isdigit(dirp->d_name[0]))
+ return 1;
+ else
+ return 0;
+}
+
+int open_dev_cpu_msr(int dummy1)
+{
+ return 0;
+}
+
+void turbostat_init()
+{
+ check_cpuid();
+
+ check_dev_msr();
+ check_super_user();
+
+ num_cpus = for_all_cpus(alloc_new_cpu_counters);
+
+ if (verbose)
+ print_nehalem_info();
+}
+
+int fork_it(char **argv)
+{
+ int retval;
+ pid_t child_pid;
+ get_counters(pcc_even);
+ gettimeofday(&tv_even, (struct timezone *)NULL);
+
+ child_pid = fork();
+ if (!child_pid) {
+ /* child */
+ execvp(argv[0], argv);
+ } else {
+ int status;
+
+ /* parent */
+ if (child_pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ signal(SIGINT, SIG_IGN);
+ signal(SIGQUIT, SIG_IGN);
+ if (waitpid(child_pid, &status, 0) == -1) {
+ perror("wait");
+ exit(1);
+ }
+ }
+ get_counters(pcc_odd);
+ gettimeofday(&tv_odd, (struct timezone *)NULL);
+ retval = compute_delta(pcc_odd, pcc_even, pcc_delta);
+
+ timersub(&tv_odd, &tv_even, &tv_delta);
+ compute_average(pcc_delta, pcc_average);
+ if (!retval)
+ print_counters(pcc_delta);
+
+ fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);;
+
+ return 0;
+}
+
+void cmdline(int argc, char **argv)
+{
+ int opt;
+
+ progname = argv[0];
+
+ while ((opt = getopt(argc, argv, "+vi:M:")) != -1) {
+ switch (opt) {
+ case 'v':
+ verbose++;
+ break;
+ case 'i':
+ interval_sec = atoi(optarg);
+ break;
+ case 'M':
+ sscanf(optarg, "%x", &extra_msr_offset);
+ if (verbose > 1)
+ fprintf(stderr, "MSR 0x%X\n", extra_msr_offset);
+ break;
+ default:
+ usage();
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ cmdline(argc, argv);
+
+ if (verbose > 1)
+ fprintf(stderr, "turbostat Dec 6, 2010"
+ " - Len Brown <lenb@kernel.org>\n");
+ if (verbose > 1)
+ fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n");
+
+ turbostat_init();
+
+ /*
+ * if any params left, it must be a command to fork
+ */
+ if (argc - optind)
+ return fork_it(argv + optind);
+ else
+ turbostat_loop();
+
+ return 0;
+}
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
new file mode 100644
index 00000000000..f458237fdd7
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -0,0 +1,8 @@
+x86_energy_perf_policy : x86_energy_perf_policy.c
+
+clean :
+ rm -f x86_energy_perf_policy
+
+install :
+ install x86_energy_perf_policy /usr/bin/
+ install x86_energy_perf_policy.8 /usr/share/man/man8/
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
new file mode 100644
index 00000000000..8eaaad648cd
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
@@ -0,0 +1,104 @@
+.\" This page Copyright (C) 2010 Len Brown <len.brown@intel.com>
+.\" Distributed under the GPL, Copyleft 1994.
+.TH X86_ENERGY_PERF_POLICY 8
+.SH NAME
+x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS
+.SH SYNOPSIS
+.ft B
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB "\-r"
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB 'performance'
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB 'normal'
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB 'powersave'
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB n
+.br
+.SH DESCRIPTION
+\fBx86_energy_perf_policy\fP
+allows software to convey
+its policy for the relative importance of performance
+versus energy savings to the processor.
+
+The processor uses this information in model-specific ways
+when it must select trade-offs between performance and
+energy efficiency.
+
+This policy hint does not supersede Processor Performance states
+(P-states) or CPU Idle power states (C-states), but allows
+software to have influence where it would otherwise be unable
+to express a preference.
+
+For example, this setting may tell the hardware how
+aggressively or conservatively to control frequency
+in the "turbo range" above the explicitly OS-controlled
+P-state frequency range. It may also tell the hardware
+how aggressively is should enter the OS requested C-states.
+
+Support for this feature is indicated by CPUID.06H.ECX.bit3
+per the Intel Architectures Software Developer's Manual.
+
+.SS Options
+\fB-c\fP limits operation to a single CPU.
+The default is to operate on all CPUs.
+Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
+logical processor, but that the initial implementations
+of the MSR were shared among all processors in each package.
+.PP
+\fB-v\fP increases verbosity. By default
+x86_energy_perf_policy is silent.
+.PP
+\fB-r\fP is for "read-only" mode - the unchanged state
+is read and displayed.
+.PP
+.I performance
+Set a policy where performance is paramount.
+The processor will be unwilling to sacrifice any performance
+for the sake of energy saving. This is the hardware default.
+.PP
+.I normal
+Set a policy with a normal balance between performance and energy efficiency.
+The processor will tolerate minor performance compromise
+for potentially significant energy savings.
+This reasonable default for most desktops and servers.
+.PP
+.I powersave
+Set a policy where the processor can accept
+a measurable performance hit to maximize energy efficiency.
+.PP
+.I n
+Set MSR_IA32_ENERGY_PERF_BIAS to the specified number.
+The range of valid numbers is 0-15, where 0 is maximum
+performance and 15 is maximum energy efficiency.
+
+.SH NOTES
+.B "x86_energy_perf_policy "
+runs only as root.
+.SH FILES
+.ta
+.nf
+/dev/cpu/*/msr
+.fi
+
+.SH "SEE ALSO"
+msr(4)
+.PP
+.SH AUTHORS
+.nf
+Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
new file mode 100644
index 00000000000..d9678a34dd7
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -0,0 +1,325 @@
+/*
+ * x86_energy_perf_policy -- set the energy versus performance
+ * policy preference bias on recent X86 processors.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * Len Brown <len.brown@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <string.h>
+
+unsigned int verbose; /* set with -v */
+unsigned int read_only; /* set with -r */
+char *progname;
+unsigned long long new_bias;
+int cpu = -1;
+
+/*
+ * Usage:
+ *
+ * -c cpu: limit action to a single CPU (default is all CPUs)
+ * -v: verbose output (can invoke more than once)
+ * -r: read-only, don't change any settings
+ *
+ * performance
+ * Performance is paramount.
+ * Unwilling to sacrafice any performance
+ * for the sake of energy saving. (hardware default)
+ *
+ * normal
+ * Can tolerate minor performance compromise
+ * for potentially significant energy savings.
+ * (reasonable default for most desktops and servers)
+ *
+ * powersave
+ * Can tolerate significant performance hit
+ * to maximize energy savings.
+ *
+ * n
+ * a numerical value to write to the underlying MSR.
+ */
+void usage(void)
+{
+ printf("%s: [-c cpu] [-v] "
+ "(-r | 'performance' | 'normal' | 'powersave' | n)\n",
+ progname);
+ exit(1);
+}
+
+#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
+
+#define BIAS_PERFORMANCE 0
+#define BIAS_BALANCE 6
+#define BIAS_POWERSAVE 15
+
+void cmdline(int argc, char **argv)
+{
+ int opt;
+
+ progname = argv[0];
+
+ while ((opt = getopt(argc, argv, "+rvc:")) != -1) {
+ switch (opt) {
+ case 'c':
+ cpu = atoi(optarg);
+ break;
+ case 'r':
+ read_only = 1;
+ break;
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ }
+ }
+ /* if -r, then should be no additional optind */
+ if (read_only && (argc > optind))
+ usage();
+
+ /*
+ * if no -r , then must be one additional optind
+ */
+ if (!read_only) {
+
+ if (argc != optind + 1) {
+ printf("must supply -r or policy param\n");
+ usage();
+ }
+
+ if (!strcmp("performance", argv[optind])) {
+ new_bias = BIAS_PERFORMANCE;
+ } else if (!strcmp("normal", argv[optind])) {
+ new_bias = BIAS_BALANCE;
+ } else if (!strcmp("powersave", argv[optind])) {
+ new_bias = BIAS_POWERSAVE;
+ } else {
+ char *endptr;
+
+ new_bias = strtoull(argv[optind], &endptr, 0);
+ if (endptr == argv[optind] ||
+ new_bias > BIAS_POWERSAVE) {
+ fprintf(stderr, "invalid value: %s\n",
+ argv[optind]);
+ usage();
+ }
+ }
+ }
+}
+
+/*
+ * validate_cpuid()
+ * returns on success, quietly exits on failure (make verbose with -v)
+ */
+void validate_cpuid(void)
+{
+ unsigned int eax, ebx, ecx, edx, max_level;
+ char brand[16];
+ unsigned int fms, family, model, stepping;
+
+ eax = ebx = ecx = edx = 0;
+
+ asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx),
+ "=d" (edx) : "a" (0));
+
+ if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) {
+ if (verbose)
+ fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel",
+ (char *)&ebx, (char *)&edx, (char *)&ecx);
+ exit(1);
+ }
+
+ asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
+ family = (fms >> 8) & 0xf;
+ model = (fms >> 4) & 0xf;
+ stepping = fms & 0xf;
+ if (family == 6 || family == 0xf)
+ model += ((fms >> 16) & 0xf) << 4;
+
+ if (verbose > 1)
+ printf("CPUID %s %d levels family:model:stepping "
+ "0x%x:%x:%x (%d:%d:%d)\n", brand, max_level,
+ family, model, stepping, family, model, stepping);
+
+ if (!(edx & (1 << 5))) {
+ if (verbose)
+ printf("CPUID: no MSR\n");
+ exit(1);
+ }
+
+ /*
+ * Support for MSR_IA32_ENERGY_PERF_BIAS
+ * is indicated by CPUID.06H.ECX.bit3
+ */
+ asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6));
+ if (verbose)
+ printf("CPUID.06H.ECX: 0x%x\n", ecx);
+ if (!(ecx & (1 << 3))) {
+ if (verbose)
+ printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n");
+ exit(1);
+ }
+ return; /* success */
+}
+
+unsigned long long get_msr(int cpu, int offset)
+{
+ unsigned long long msr;
+ char msr_path[32];
+ int retval;
+ int fd;
+
+ sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
+ fd = open(msr_path, O_RDONLY);
+ if (fd < 0) {
+ printf("Try \"# modprobe msr\"\n");
+ perror(msr_path);
+ exit(1);
+ }
+
+ retval = pread(fd, &msr, sizeof msr, offset);
+
+ if (retval != sizeof msr) {
+ printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
+ exit(-2);
+ }
+ close(fd);
+ return msr;
+}
+
+unsigned long long put_msr(int cpu, unsigned long long new_msr, int offset)
+{
+ unsigned long long old_msr;
+ char msr_path[32];
+ int retval;
+ int fd;
+
+ sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
+ fd = open(msr_path, O_RDWR);
+ if (fd < 0) {
+ perror(msr_path);
+ exit(1);
+ }
+
+ retval = pread(fd, &old_msr, sizeof old_msr, offset);
+ if (retval != sizeof old_msr) {
+ perror("pwrite");
+ printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
+ exit(-2);
+ }
+
+ retval = pwrite(fd, &new_msr, sizeof new_msr, offset);
+ if (retval != sizeof new_msr) {
+ perror("pwrite");
+ printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval);
+ exit(-2);
+ }
+
+ close(fd);
+
+ return old_msr;
+}
+
+void print_msr(int cpu)
+{
+ printf("cpu%d: 0x%016llx\n",
+ cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS));
+}
+
+void update_msr(int cpu)
+{
+ unsigned long long previous_msr;
+
+ previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS);
+
+ if (verbose)
+ printf("cpu%d msr0x%x 0x%016llx -> 0x%016llx\n",
+ cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias);
+
+ return;
+}
+
+char *proc_stat = "/proc/stat";
+/*
+ * run func() on every cpu in /dev/cpu
+ */
+void for_every_cpu(void (func)(int))
+{
+ FILE *fp;
+ int retval;
+
+ fp = fopen(proc_stat, "r");
+ if (fp == NULL) {
+ perror(proc_stat);
+ exit(1);
+ }
+
+ retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
+ if (retval != 0) {
+ perror("/proc/stat format");
+ exit(1);
+ }
+
+ while (1) {
+ int cpu;
+
+ retval = fscanf(fp,
+ "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
+ &cpu);
+ if (retval != 1)
+ return;
+
+ func(cpu);
+ }
+ fclose(fp);
+}
+
+int main(int argc, char **argv)
+{
+ cmdline(argc, argv);
+
+ if (verbose > 1)
+ printf("x86_energy_perf_policy Nov 24, 2010"
+ " - Len Brown <lenb@kernel.org>\n");
+ if (verbose > 1 && !read_only)
+ printf("new_bias %lld\n", new_bias);
+
+ validate_cpuid();
+
+ if (cpu != -1) {
+ if (read_only)
+ print_msr(cpu);
+ else
+ update_msr(cpu);
+ } else {
+ if (read_only)
+ for_every_cpu(print_msr);
+ else
+ for_every_cpu(update_msr);
+ }
+
+ return 0;
+}
diff --git a/tools/slub/slabinfo.c b/tools/slub/slabinfo.c
new file mode 100644
index 00000000000..516551c9f17
--- /dev/null
+++ b/tools/slub/slabinfo.c
@@ -0,0 +1,1364 @@
+/*
+ * Slabinfo: Tool to get reports about slabs
+ *
+ * (C) 2007 sgi, Christoph Lameter
+ *
+ * Compile by:
+ *
+ * gcc -o slabinfo slabinfo.c
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <strings.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <regex.h>
+#include <errno.h>
+
+#define MAX_SLABS 500
+#define MAX_ALIASES 500
+#define MAX_NODES 1024
+
+struct slabinfo {
+ char *name;
+ int alias;
+ int refs;
+ int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu;
+ int hwcache_align, object_size, objs_per_slab;
+ int sanity_checks, slab_size, store_user, trace;
+ int order, poison, reclaim_account, red_zone;
+ unsigned long partial, objects, slabs, objects_partial, objects_total;
+ unsigned long alloc_fastpath, alloc_slowpath;
+ unsigned long free_fastpath, free_slowpath;
+ unsigned long free_frozen, free_add_partial, free_remove_partial;
+ unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill;
+ unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
+ unsigned long deactivate_to_head, deactivate_to_tail;
+ unsigned long deactivate_remote_frees, order_fallback;
+ int numa[MAX_NODES];
+ int numa_partial[MAX_NODES];
+} slabinfo[MAX_SLABS];
+
+struct aliasinfo {
+ char *name;
+ char *ref;
+ struct slabinfo *slab;
+} aliasinfo[MAX_ALIASES];
+
+int slabs = 0;
+int actual_slabs = 0;
+int aliases = 0;
+int alias_targets = 0;
+int highest_node = 0;
+
+char buffer[4096];
+
+int show_empty = 0;
+int show_report = 0;
+int show_alias = 0;
+int show_slab = 0;
+int skip_zero = 1;
+int show_numa = 0;
+int show_track = 0;
+int show_first_alias = 0;
+int validate = 0;
+int shrink = 0;
+int show_inverted = 0;
+int show_single_ref = 0;
+int show_totals = 0;
+int sort_size = 0;
+int sort_active = 0;
+int set_debug = 0;
+int show_ops = 0;
+int show_activity = 0;
+
+/* Debug options */
+int sanity = 0;
+int redzone = 0;
+int poison = 0;
+int tracking = 0;
+int tracing = 0;
+
+int page_size;
+
+regex_t pattern;
+
+static void fatal(const char *x, ...)
+{
+ va_list ap;
+
+ va_start(ap, x);
+ vfprintf(stderr, x, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+static void usage(void)
+{
+ printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n"
+ "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
+ "-a|--aliases Show aliases\n"
+ "-A|--activity Most active slabs first\n"
+ "-d<options>|--debug=<options> Set/Clear Debug options\n"
+ "-D|--display-active Switch line format to activity\n"
+ "-e|--empty Show empty slabs\n"
+ "-f|--first-alias Show first alias\n"
+ "-h|--help Show usage information\n"
+ "-i|--inverted Inverted list\n"
+ "-l|--slabs Show slabs\n"
+ "-n|--numa Show NUMA information\n"
+ "-o|--ops Show kmem_cache_ops\n"
+ "-s|--shrink Shrink slabs\n"
+ "-r|--report Detailed report on single slabs\n"
+ "-S|--Size Sort by size\n"
+ "-t|--tracking Show alloc/free information\n"
+ "-T|--Totals Show summary information\n"
+ "-v|--validate Validate slabs\n"
+ "-z|--zero Include empty slabs\n"
+ "-1|--1ref Single reference\n"
+ "\nValid debug options (FZPUT may be combined)\n"
+ "a / A Switch on all debug options (=FZUP)\n"
+ "- Switch off all debug options\n"
+ "f / F Sanity Checks (SLAB_DEBUG_FREE)\n"
+ "z / Z Redzoning\n"
+ "p / P Poisoning\n"
+ "u / U Tracking\n"
+ "t / T Tracing\n"
+ );
+}
+
+static unsigned long read_obj(const char *name)
+{
+ FILE *f = fopen(name, "r");
+
+ if (!f)
+ buffer[0] = 0;
+ else {
+ if (!fgets(buffer, sizeof(buffer), f))
+ buffer[0] = 0;
+ fclose(f);
+ if (buffer[strlen(buffer)] == '\n')
+ buffer[strlen(buffer)] = 0;
+ }
+ return strlen(buffer);
+}
+
+
+/*
+ * Get the contents of an attribute
+ */
+static unsigned long get_obj(const char *name)
+{
+ if (!read_obj(name))
+ return 0;
+
+ return atol(buffer);
+}
+
+static unsigned long get_obj_and_str(const char *name, char **x)
+{
+ unsigned long result = 0;
+ char *p;
+
+ *x = NULL;
+
+ if (!read_obj(name)) {
+ x = NULL;
+ return 0;
+ }
+ result = strtoul(buffer, &p, 10);
+ while (*p == ' ')
+ p++;
+ if (*p)
+ *x = strdup(p);
+ return result;
+}
+
+static void set_obj(struct slabinfo *s, const char *name, int n)
+{
+ char x[100];
+ FILE *f;
+
+ snprintf(x, 100, "%s/%s", s->name, name);
+ f = fopen(x, "w");
+ if (!f)
+ fatal("Cannot write to %s\n", x);
+
+ fprintf(f, "%d\n", n);
+ fclose(f);
+}
+
+static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
+{
+ char x[100];
+ FILE *f;
+ size_t l;
+
+ snprintf(x, 100, "%s/%s", s->name, name);
+ f = fopen(x, "r");
+ if (!f) {
+ buffer[0] = 0;
+ l = 0;
+ } else {
+ l = fread(buffer, 1, sizeof(buffer), f);
+ buffer[l] = 0;
+ fclose(f);
+ }
+ return l;
+}
+
+
+/*
+ * Put a size string together
+ */
+static int store_size(char *buffer, unsigned long value)
+{
+ unsigned long divisor = 1;
+ char trailer = 0;
+ int n;
+
+ if (value > 1000000000UL) {
+ divisor = 100000000UL;
+ trailer = 'G';
+ } else if (value > 1000000UL) {
+ divisor = 100000UL;
+ trailer = 'M';
+ } else if (value > 1000UL) {
+ divisor = 100;
+ trailer = 'K';
+ }
+
+ value /= divisor;
+ n = sprintf(buffer, "%ld",value);
+ if (trailer) {
+ buffer[n] = trailer;
+ n++;
+ buffer[n] = 0;
+ }
+ if (divisor != 1) {
+ memmove(buffer + n - 2, buffer + n - 3, 4);
+ buffer[n-2] = '.';
+ n++;
+ }
+ return n;
+}
+
+static void decode_numa_list(int *numa, char *t)
+{
+ int node;
+ int nr;
+
+ memset(numa, 0, MAX_NODES * sizeof(int));
+
+ if (!t)
+ return;
+
+ while (*t == 'N') {
+ t++;
+ node = strtoul(t, &t, 10);
+ if (*t == '=') {
+ t++;
+ nr = strtoul(t, &t, 10);
+ numa[node] = nr;
+ if (node > highest_node)
+ highest_node = node;
+ }
+ while (*t == ' ')
+ t++;
+ }
+}
+
+static void slab_validate(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ set_obj(s, "validate", 1);
+}
+
+static void slab_shrink(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ set_obj(s, "shrink", 1);
+}
+
+int line = 0;
+
+static void first_line(void)
+{
+ if (show_activity)
+ printf("Name Objects Alloc Free %%Fast Fallb O\n");
+ else
+ printf("Name Objects Objsize Space "
+ "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
+}
+
+/*
+ * Find the shortest alias of a slab
+ */
+static struct aliasinfo *find_one_alias(struct slabinfo *find)
+{
+ struct aliasinfo *a;
+ struct aliasinfo *best = NULL;
+
+ for(a = aliasinfo;a < aliasinfo + aliases; a++) {
+ if (a->slab == find &&
+ (!best || strlen(best->name) < strlen(a->name))) {
+ best = a;
+ if (strncmp(a->name,"kmall", 5) == 0)
+ return best;
+ }
+ }
+ return best;
+}
+
+static unsigned long slab_size(struct slabinfo *s)
+{
+ return s->slabs * (page_size << s->order);
+}
+
+static unsigned long slab_activity(struct slabinfo *s)
+{
+ return s->alloc_fastpath + s->free_fastpath +
+ s->alloc_slowpath + s->free_slowpath;
+}
+
+static void slab_numa(struct slabinfo *s, int mode)
+{
+ int node;
+
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (!highest_node) {
+ printf("\n%s: No NUMA information available.\n", s->name);
+ return;
+ }
+
+ if (skip_zero && !s->slabs)
+ return;
+
+ if (!line) {
+ printf("\n%-21s:", mode ? "NUMA nodes" : "Slab");
+ for(node = 0; node <= highest_node; node++)
+ printf(" %4d", node);
+ printf("\n----------------------");
+ for(node = 0; node <= highest_node; node++)
+ printf("-----");
+ printf("\n");
+ }
+ printf("%-21s ", mode ? "All slabs" : s->name);
+ for(node = 0; node <= highest_node; node++) {
+ char b[20];
+
+ store_size(b, s->numa[node]);
+ printf(" %4s", b);
+ }
+ printf("\n");
+ if (mode) {
+ printf("%-21s ", "Partial slabs");
+ for(node = 0; node <= highest_node; node++) {
+ char b[20];
+
+ store_size(b, s->numa_partial[node]);
+ printf(" %4s", b);
+ }
+ printf("\n");
+ }
+ line++;
+}
+
+static void show_tracking(struct slabinfo *s)
+{
+ printf("\n%s: Kernel object allocation\n", s->name);
+ printf("-----------------------------------------------------------------------\n");
+ if (read_slab_obj(s, "alloc_calls"))
+ printf(buffer);
+ else
+ printf("No Data\n");
+
+ printf("\n%s: Kernel object freeing\n", s->name);
+ printf("------------------------------------------------------------------------\n");
+ if (read_slab_obj(s, "free_calls"))
+ printf(buffer);
+ else
+ printf("No Data\n");
+
+}
+
+static void ops(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (read_slab_obj(s, "ops")) {
+ printf("\n%s: kmem_cache operations\n", s->name);
+ printf("--------------------------------------------\n");
+ printf(buffer);
+ } else
+ printf("\n%s has no kmem_cache operations\n", s->name);
+}
+
+static const char *onoff(int x)
+{
+ if (x)
+ return "On ";
+ return "Off";
+}
+
+static void slab_stats(struct slabinfo *s)
+{
+ unsigned long total_alloc;
+ unsigned long total_free;
+ unsigned long total;
+
+ if (!s->alloc_slab)
+ return;
+
+ total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+ total_free = s->free_fastpath + s->free_slowpath;
+
+ if (!total_alloc)
+ return;
+
+ printf("\n");
+ printf("Slab Perf Counter Alloc Free %%Al %%Fr\n");
+ printf("--------------------------------------------------\n");
+ printf("Fastpath %8lu %8lu %3lu %3lu\n",
+ s->alloc_fastpath, s->free_fastpath,
+ s->alloc_fastpath * 100 / total_alloc,
+ s->free_fastpath * 100 / total_free);
+ printf("Slowpath %8lu %8lu %3lu %3lu\n",
+ total_alloc - s->alloc_fastpath, s->free_slowpath,
+ (total_alloc - s->alloc_fastpath) * 100 / total_alloc,
+ s->free_slowpath * 100 / total_free);
+ printf("Page Alloc %8lu %8lu %3lu %3lu\n",
+ s->alloc_slab, s->free_slab,
+ s->alloc_slab * 100 / total_alloc,
+ s->free_slab * 100 / total_free);
+ printf("Add partial %8lu %8lu %3lu %3lu\n",
+ s->deactivate_to_head + s->deactivate_to_tail,
+ s->free_add_partial,
+ (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
+ s->free_add_partial * 100 / total_free);
+ printf("Remove partial %8lu %8lu %3lu %3lu\n",
+ s->alloc_from_partial, s->free_remove_partial,
+ s->alloc_from_partial * 100 / total_alloc,
+ s->free_remove_partial * 100 / total_free);
+
+ printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
+ s->deactivate_remote_frees, s->free_frozen,
+ s->deactivate_remote_frees * 100 / total_alloc,
+ s->free_frozen * 100 / total_free);
+
+ printf("Total %8lu %8lu\n\n", total_alloc, total_free);
+
+ if (s->cpuslab_flush)
+ printf("Flushes %8lu\n", s->cpuslab_flush);
+
+ if (s->alloc_refill)
+ printf("Refill %8lu\n", s->alloc_refill);
+
+ total = s->deactivate_full + s->deactivate_empty +
+ s->deactivate_to_head + s->deactivate_to_tail;
+
+ if (total)
+ printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) "
+ "ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n",
+ s->deactivate_full, (s->deactivate_full * 100) / total,
+ s->deactivate_empty, (s->deactivate_empty * 100) / total,
+ s->deactivate_to_head, (s->deactivate_to_head * 100) / total,
+ s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
+}
+
+static void report(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ printf("\nSlabcache: %-20s Aliases: %2d Order : %2d Objects: %lu\n",
+ s->name, s->aliases, s->order, s->objects);
+ if (s->hwcache_align)
+ printf("** Hardware cacheline aligned\n");
+ if (s->cache_dma)
+ printf("** Memory is allocated in a special DMA zone\n");
+ if (s->destroy_by_rcu)
+ printf("** Slabs are destroyed via RCU\n");
+ if (s->reclaim_account)
+ printf("** Reclaim accounting active\n");
+
+ printf("\nSizes (bytes) Slabs Debug Memory\n");
+ printf("------------------------------------------------------------------------\n");
+ printf("Object : %7d Total : %7ld Sanity Checks : %s Total: %7ld\n",
+ s->object_size, s->slabs, onoff(s->sanity_checks),
+ s->slabs * (page_size << s->order));
+ printf("SlabObj: %7d Full : %7ld Redzoning : %s Used : %7ld\n",
+ s->slab_size, s->slabs - s->partial - s->cpu_slabs,
+ onoff(s->red_zone), s->objects * s->object_size);
+ printf("SlabSiz: %7d Partial: %7ld Poisoning : %s Loss : %7ld\n",
+ page_size << s->order, s->partial, onoff(s->poison),
+ s->slabs * (page_size << s->order) - s->objects * s->object_size);
+ printf("Loss : %7d CpuSlab: %7d Tracking : %s Lalig: %7ld\n",
+ s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user),
+ (s->slab_size - s->object_size) * s->objects);
+ printf("Align : %7d Objects: %7d Tracing : %s Lpadd: %7ld\n",
+ s->align, s->objs_per_slab, onoff(s->trace),
+ ((page_size << s->order) - s->objs_per_slab * s->slab_size) *
+ s->slabs);
+
+ ops(s);
+ show_tracking(s);
+ slab_numa(s, 1);
+ slab_stats(s);
+}
+
+static void slabcache(struct slabinfo *s)
+{
+ char size_str[20];
+ char dist_str[40];
+ char flags[20];
+ char *p = flags;
+
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (actual_slabs == 1) {
+ report(s);
+ return;
+ }
+
+ if (skip_zero && !show_empty && !s->slabs)
+ return;
+
+ if (show_empty && s->slabs)
+ return;
+
+ store_size(size_str, slab_size(s));
+ snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs,
+ s->partial, s->cpu_slabs);
+
+ if (!line++)
+ first_line();
+
+ if (s->aliases)
+ *p++ = '*';
+ if (s->cache_dma)
+ *p++ = 'd';
+ if (s->hwcache_align)
+ *p++ = 'A';
+ if (s->poison)
+ *p++ = 'P';
+ if (s->reclaim_account)
+ *p++ = 'a';
+ if (s->red_zone)
+ *p++ = 'Z';
+ if (s->sanity_checks)
+ *p++ = 'F';
+ if (s->store_user)
+ *p++ = 'U';
+ if (s->trace)
+ *p++ = 'T';
+
+ *p = 0;
+ if (show_activity) {
+ unsigned long total_alloc;
+ unsigned long total_free;
+
+ total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+ total_free = s->free_fastpath + s->free_slowpath;
+
+ printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d\n",
+ s->name, s->objects,
+ total_alloc, total_free,
+ total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
+ total_free ? (s->free_fastpath * 100 / total_free) : 0,
+ s->order_fallback, s->order);
+ }
+ else
+ printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
+ s->name, s->objects, s->object_size, size_str, dist_str,
+ s->objs_per_slab, s->order,
+ s->slabs ? (s->partial * 100) / s->slabs : 100,
+ s->slabs ? (s->objects * s->object_size * 100) /
+ (s->slabs * (page_size << s->order)) : 100,
+ flags);
+}
+
+/*
+ * Analyze debug options. Return false if something is amiss.
+ */
+static int debug_opt_scan(char *opt)
+{
+ if (!opt || !opt[0] || strcmp(opt, "-") == 0)
+ return 1;
+
+ if (strcasecmp(opt, "a") == 0) {
+ sanity = 1;
+ poison = 1;
+ redzone = 1;
+ tracking = 1;
+ return 1;
+ }
+
+ for ( ; *opt; opt++)
+ switch (*opt) {
+ case 'F' : case 'f':
+ if (sanity)
+ return 0;
+ sanity = 1;
+ break;
+ case 'P' : case 'p':
+ if (poison)
+ return 0;
+ poison = 1;
+ break;
+
+ case 'Z' : case 'z':
+ if (redzone)
+ return 0;
+ redzone = 1;
+ break;
+
+ case 'U' : case 'u':
+ if (tracking)
+ return 0;
+ tracking = 1;
+ break;
+
+ case 'T' : case 't':
+ if (tracing)
+ return 0;
+ tracing = 1;
+ break;
+ default:
+ return 0;
+ }
+ return 1;
+}
+
+static int slab_empty(struct slabinfo *s)
+{
+ if (s->objects > 0)
+ return 0;
+
+ /*
+ * We may still have slabs even if there are no objects. Shrinking will
+ * remove them.
+ */
+ if (s->slabs != 0)
+ set_obj(s, "shrink", 1);
+
+ return 1;
+}
+
+static void slab_debug(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (sanity && !s->sanity_checks) {
+ set_obj(s, "sanity", 1);
+ }
+ if (!sanity && s->sanity_checks) {
+ if (slab_empty(s))
+ set_obj(s, "sanity", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name);
+ }
+ if (redzone && !s->red_zone) {
+ if (slab_empty(s))
+ set_obj(s, "red_zone", 1);
+ else
+ fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name);
+ }
+ if (!redzone && s->red_zone) {
+ if (slab_empty(s))
+ set_obj(s, "red_zone", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name);
+ }
+ if (poison && !s->poison) {
+ if (slab_empty(s))
+ set_obj(s, "poison", 1);
+ else
+ fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name);
+ }
+ if (!poison && s->poison) {
+ if (slab_empty(s))
+ set_obj(s, "poison", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name);
+ }
+ if (tracking && !s->store_user) {
+ if (slab_empty(s))
+ set_obj(s, "store_user", 1);
+ else
+ fprintf(stderr, "%s not empty cannot enable tracking\n", s->name);
+ }
+ if (!tracking && s->store_user) {
+ if (slab_empty(s))
+ set_obj(s, "store_user", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable tracking\n", s->name);
+ }
+ if (tracing && !s->trace) {
+ if (slabs == 1)
+ set_obj(s, "trace", 1);
+ else
+ fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name);
+ }
+ if (!tracing && s->trace)
+ set_obj(s, "trace", 1);
+}
+
+static void totals(void)
+{
+ struct slabinfo *s;
+
+ int used_slabs = 0;
+ char b1[20], b2[20], b3[20], b4[20];
+ unsigned long long max = 1ULL << 63;
+
+ /* Object size */
+ unsigned long long min_objsize = max, max_objsize = 0, avg_objsize;
+
+ /* Number of partial slabs in a slabcache */
+ unsigned long long min_partial = max, max_partial = 0,
+ avg_partial, total_partial = 0;
+
+ /* Number of slabs in a slab cache */
+ unsigned long long min_slabs = max, max_slabs = 0,
+ avg_slabs, total_slabs = 0;
+
+ /* Size of the whole slab */
+ unsigned long long min_size = max, max_size = 0,
+ avg_size, total_size = 0;
+
+ /* Bytes used for object storage in a slab */
+ unsigned long long min_used = max, max_used = 0,
+ avg_used, total_used = 0;
+
+ /* Waste: Bytes used for alignment and padding */
+ unsigned long long min_waste = max, max_waste = 0,
+ avg_waste, total_waste = 0;
+ /* Number of objects in a slab */
+ unsigned long long min_objects = max, max_objects = 0,
+ avg_objects, total_objects = 0;
+ /* Waste per object */
+ unsigned long long min_objwaste = max,
+ max_objwaste = 0, avg_objwaste,
+ total_objwaste = 0;
+
+ /* Memory per object */
+ unsigned long long min_memobj = max,
+ max_memobj = 0, avg_memobj,
+ total_objsize = 0;
+
+ /* Percentage of partial slabs per slab */
+ unsigned long min_ppart = 100, max_ppart = 0,
+ avg_ppart, total_ppart = 0;
+
+ /* Number of objects in partial slabs */
+ unsigned long min_partobj = max, max_partobj = 0,
+ avg_partobj, total_partobj = 0;
+
+ /* Percentage of partial objects of all objects in a slab */
+ unsigned long min_ppartobj = 100, max_ppartobj = 0,
+ avg_ppartobj, total_ppartobj = 0;
+
+
+ for (s = slabinfo; s < slabinfo + slabs; s++) {
+ unsigned long long size;
+ unsigned long used;
+ unsigned long long wasted;
+ unsigned long long objwaste;
+ unsigned long percentage_partial_slabs;
+ unsigned long percentage_partial_objs;
+
+ if (!s->slabs || !s->objects)
+ continue;
+
+ used_slabs++;
+
+ size = slab_size(s);
+ used = s->objects * s->object_size;
+ wasted = size - used;
+ objwaste = s->slab_size - s->object_size;
+
+ percentage_partial_slabs = s->partial * 100 / s->slabs;
+ if (percentage_partial_slabs > 100)
+ percentage_partial_slabs = 100;
+
+ percentage_partial_objs = s->objects_partial * 100
+ / s->objects;
+
+ if (percentage_partial_objs > 100)
+ percentage_partial_objs = 100;
+
+ if (s->object_size < min_objsize)
+ min_objsize = s->object_size;
+ if (s->partial < min_partial)
+ min_partial = s->partial;
+ if (s->slabs < min_slabs)
+ min_slabs = s->slabs;
+ if (size < min_size)
+ min_size = size;
+ if (wasted < min_waste)
+ min_waste = wasted;
+ if (objwaste < min_objwaste)
+ min_objwaste = objwaste;
+ if (s->objects < min_objects)
+ min_objects = s->objects;
+ if (used < min_used)
+ min_used = used;
+ if (s->objects_partial < min_partobj)
+ min_partobj = s->objects_partial;
+ if (percentage_partial_slabs < min_ppart)
+ min_ppart = percentage_partial_slabs;
+ if (percentage_partial_objs < min_ppartobj)
+ min_ppartobj = percentage_partial_objs;
+ if (s->slab_size < min_memobj)
+ min_memobj = s->slab_size;
+
+ if (s->object_size > max_objsize)
+ max_objsize = s->object_size;
+ if (s->partial > max_partial)
+ max_partial = s->partial;
+ if (s->slabs > max_slabs)
+ max_slabs = s->slabs;
+ if (size > max_size)
+ max_size = size;
+ if (wasted > max_waste)
+ max_waste = wasted;
+ if (objwaste > max_objwaste)
+ max_objwaste = objwaste;
+ if (s->objects > max_objects)
+ max_objects = s->objects;
+ if (used > max_used)
+ max_used = used;
+ if (s->objects_partial > max_partobj)
+ max_partobj = s->objects_partial;
+ if (percentage_partial_slabs > max_ppart)
+ max_ppart = percentage_partial_slabs;
+ if (percentage_partial_objs > max_ppartobj)
+ max_ppartobj = percentage_partial_objs;
+ if (s->slab_size > max_memobj)
+ max_memobj = s->slab_size;
+
+ total_partial += s->partial;
+ total_slabs += s->slabs;
+ total_size += size;
+ total_waste += wasted;
+
+ total_objects += s->objects;
+ total_used += used;
+ total_partobj += s->objects_partial;
+ total_ppart += percentage_partial_slabs;
+ total_ppartobj += percentage_partial_objs;
+
+ total_objwaste += s->objects * objwaste;
+ total_objsize += s->objects * s->slab_size;
+ }
+
+ if (!total_objects) {
+ printf("No objects\n");
+ return;
+ }
+ if (!used_slabs) {
+ printf("No slabs\n");
+ return;
+ }
+
+ /* Per slab averages */
+ avg_partial = total_partial / used_slabs;
+ avg_slabs = total_slabs / used_slabs;
+ avg_size = total_size / used_slabs;
+ avg_waste = total_waste / used_slabs;
+
+ avg_objects = total_objects / used_slabs;
+ avg_used = total_used / used_slabs;
+ avg_partobj = total_partobj / used_slabs;
+ avg_ppart = total_ppart / used_slabs;
+ avg_ppartobj = total_ppartobj / used_slabs;
+
+ /* Per object object sizes */
+ avg_objsize = total_used / total_objects;
+ avg_objwaste = total_objwaste / total_objects;
+ avg_partobj = total_partobj * 100 / total_objects;
+ avg_memobj = total_objsize / total_objects;
+
+ printf("Slabcache Totals\n");
+ printf("----------------\n");
+ printf("Slabcaches : %3d Aliases : %3d->%-3d Active: %3d\n",
+ slabs, aliases, alias_targets, used_slabs);
+
+ store_size(b1, total_size);store_size(b2, total_waste);
+ store_size(b3, total_waste * 100 / total_used);
+ printf("Memory used: %6s # Loss : %6s MRatio:%6s%%\n", b1, b2, b3);
+
+ store_size(b1, total_objects);store_size(b2, total_partobj);
+ store_size(b3, total_partobj * 100 / total_objects);
+ printf("# Objects : %6s # PartObj: %6s ORatio:%6s%%\n", b1, b2, b3);
+
+ printf("\n");
+ printf("Per Cache Average Min Max Total\n");
+ printf("---------------------------------------------------------\n");
+
+ store_size(b1, avg_objects);store_size(b2, min_objects);
+ store_size(b3, max_objects);store_size(b4, total_objects);
+ printf("#Objects %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_slabs);store_size(b2, min_slabs);
+ store_size(b3, max_slabs);store_size(b4, total_slabs);
+ printf("#Slabs %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_partial);store_size(b2, min_partial);
+ store_size(b3, max_partial);store_size(b4, total_partial);
+ printf("#PartSlab %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+ store_size(b1, avg_ppart);store_size(b2, min_ppart);
+ store_size(b3, max_ppart);
+ store_size(b4, total_partial * 100 / total_slabs);
+ printf("%%PartSlab%10s%% %10s%% %10s%% %10s%%\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_partobj);store_size(b2, min_partobj);
+ store_size(b3, max_partobj);
+ store_size(b4, total_partobj);
+ printf("PartObjs %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj);
+ store_size(b3, max_ppartobj);
+ store_size(b4, total_partobj * 100 / total_objects);
+ printf("%% PartObj%10s%% %10s%% %10s%% %10s%%\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_size);store_size(b2, min_size);
+ store_size(b3, max_size);store_size(b4, total_size);
+ printf("Memory %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_used);store_size(b2, min_used);
+ store_size(b3, max_used);store_size(b4, total_used);
+ printf("Used %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_waste);store_size(b2, min_waste);
+ store_size(b3, max_waste);store_size(b4, total_waste);
+ printf("Loss %10s %10s %10s %10s\n",
+ b1, b2, b3, b4);
+
+ printf("\n");
+ printf("Per Object Average Min Max\n");
+ printf("---------------------------------------------\n");
+
+ store_size(b1, avg_memobj);store_size(b2, min_memobj);
+ store_size(b3, max_memobj);
+ printf("Memory %10s %10s %10s\n",
+ b1, b2, b3);
+ store_size(b1, avg_objsize);store_size(b2, min_objsize);
+ store_size(b3, max_objsize);
+ printf("User %10s %10s %10s\n",
+ b1, b2, b3);
+
+ store_size(b1, avg_objwaste);store_size(b2, min_objwaste);
+ store_size(b3, max_objwaste);
+ printf("Loss %10s %10s %10s\n",
+ b1, b2, b3);
+}
+
+static void sort_slabs(void)
+{
+ struct slabinfo *s1,*s2;
+
+ for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) {
+ for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) {
+ int result;
+
+ if (sort_size)
+ result = slab_size(s1) < slab_size(s2);
+ else if (sort_active)
+ result = slab_activity(s1) < slab_activity(s2);
+ else
+ result = strcasecmp(s1->name, s2->name);
+
+ if (show_inverted)
+ result = -result;
+
+ if (result > 0) {
+ struct slabinfo t;
+
+ memcpy(&t, s1, sizeof(struct slabinfo));
+ memcpy(s1, s2, sizeof(struct slabinfo));
+ memcpy(s2, &t, sizeof(struct slabinfo));
+ }
+ }
+ }
+}
+
+static void sort_aliases(void)
+{
+ struct aliasinfo *a1,*a2;
+
+ for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) {
+ for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) {
+ char *n1, *n2;
+
+ n1 = a1->name;
+ n2 = a2->name;
+ if (show_alias && !show_inverted) {
+ n1 = a1->ref;
+ n2 = a2->ref;
+ }
+ if (strcasecmp(n1, n2) > 0) {
+ struct aliasinfo t;
+
+ memcpy(&t, a1, sizeof(struct aliasinfo));
+ memcpy(a1, a2, sizeof(struct aliasinfo));
+ memcpy(a2, &t, sizeof(struct aliasinfo));
+ }
+ }
+ }
+}
+
+static void link_slabs(void)
+{
+ struct aliasinfo *a;
+ struct slabinfo *s;
+
+ for (a = aliasinfo; a < aliasinfo + aliases; a++) {
+
+ for (s = slabinfo; s < slabinfo + slabs; s++)
+ if (strcmp(a->ref, s->name) == 0) {
+ a->slab = s;
+ s->refs++;
+ break;
+ }
+ if (s == slabinfo + slabs)
+ fatal("Unresolved alias %s\n", a->ref);
+ }
+}
+
+static void alias(void)
+{
+ struct aliasinfo *a;
+ char *active = NULL;
+
+ sort_aliases();
+ link_slabs();
+
+ for(a = aliasinfo; a < aliasinfo + aliases; a++) {
+
+ if (!show_single_ref && a->slab->refs == 1)
+ continue;
+
+ if (!show_inverted) {
+ if (active) {
+ if (strcmp(a->slab->name, active) == 0) {
+ printf(" %s", a->name);
+ continue;
+ }
+ }
+ printf("\n%-12s <- %s", a->slab->name, a->name);
+ active = a->slab->name;
+ }
+ else
+ printf("%-20s -> %s\n", a->name, a->slab->name);
+ }
+ if (active)
+ printf("\n");
+}
+
+
+static void rename_slabs(void)
+{
+ struct slabinfo *s;
+ struct aliasinfo *a;
+
+ for (s = slabinfo; s < slabinfo + slabs; s++) {
+ if (*s->name != ':')
+ continue;
+
+ if (s->refs > 1 && !show_first_alias)
+ continue;
+
+ a = find_one_alias(s);
+
+ if (a)
+ s->name = a->name;
+ else {
+ s->name = "*";
+ actual_slabs--;
+ }
+ }
+}
+
+static int slab_mismatch(char *slab)
+{
+ return regexec(&pattern, slab, 0, NULL, 0);
+}
+
+static void read_slab_dir(void)
+{
+ DIR *dir;
+ struct dirent *de;
+ struct slabinfo *slab = slabinfo;
+ struct aliasinfo *alias = aliasinfo;
+ char *p;
+ char *t;
+ int count;
+
+ if (chdir("/sys/kernel/slab") && chdir("/sys/slab"))
+ fatal("SYSFS support for SLUB not active\n");
+
+ dir = opendir(".");
+ while ((de = readdir(dir))) {
+ if (de->d_name[0] == '.' ||
+ (de->d_name[0] != ':' && slab_mismatch(de->d_name)))
+ continue;
+ switch (de->d_type) {
+ case DT_LNK:
+ alias->name = strdup(de->d_name);
+ count = readlink(de->d_name, buffer, sizeof(buffer));
+
+ if (count < 0)
+ fatal("Cannot read symlink %s\n", de->d_name);
+
+ buffer[count] = 0;
+ p = buffer + count;
+ while (p > buffer && p[-1] != '/')
+ p--;
+ alias->ref = strdup(p);
+ alias++;
+ break;
+ case DT_DIR:
+ if (chdir(de->d_name))
+ fatal("Unable to access slab %s\n", slab->name);
+ slab->name = strdup(de->d_name);
+ slab->alias = 0;
+ slab->refs = 0;
+ slab->aliases = get_obj("aliases");
+ slab->align = get_obj("align");
+ slab->cache_dma = get_obj("cache_dma");
+ slab->cpu_slabs = get_obj("cpu_slabs");
+ slab->destroy_by_rcu = get_obj("destroy_by_rcu");
+ slab->hwcache_align = get_obj("hwcache_align");
+ slab->object_size = get_obj("object_size");
+ slab->objects = get_obj("objects");
+ slab->objects_partial = get_obj("objects_partial");
+ slab->objects_total = get_obj("objects_total");
+ slab->objs_per_slab = get_obj("objs_per_slab");
+ slab->order = get_obj("order");
+ slab->partial = get_obj("partial");
+ slab->partial = get_obj_and_str("partial", &t);
+ decode_numa_list(slab->numa_partial, t);
+ free(t);
+ slab->poison = get_obj("poison");
+ slab->reclaim_account = get_obj("reclaim_account");
+ slab->red_zone = get_obj("red_zone");
+ slab->sanity_checks = get_obj("sanity_checks");
+ slab->slab_size = get_obj("slab_size");
+ slab->slabs = get_obj_and_str("slabs", &t);
+ decode_numa_list(slab->numa, t);
+ free(t);
+ slab->store_user = get_obj("store_user");
+ slab->trace = get_obj("trace");
+ slab->alloc_fastpath = get_obj("alloc_fastpath");
+ slab->alloc_slowpath = get_obj("alloc_slowpath");
+ slab->free_fastpath = get_obj("free_fastpath");
+ slab->free_slowpath = get_obj("free_slowpath");
+ slab->free_frozen= get_obj("free_frozen");
+ slab->free_add_partial = get_obj("free_add_partial");
+ slab->free_remove_partial = get_obj("free_remove_partial");
+ slab->alloc_from_partial = get_obj("alloc_from_partial");
+ slab->alloc_slab = get_obj("alloc_slab");
+ slab->alloc_refill = get_obj("alloc_refill");
+ slab->free_slab = get_obj("free_slab");
+ slab->cpuslab_flush = get_obj("cpuslab_flush");
+ slab->deactivate_full = get_obj("deactivate_full");
+ slab->deactivate_empty = get_obj("deactivate_empty");
+ slab->deactivate_to_head = get_obj("deactivate_to_head");
+ slab->deactivate_to_tail = get_obj("deactivate_to_tail");
+ slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
+ slab->order_fallback = get_obj("order_fallback");
+ chdir("..");
+ if (slab->name[0] == ':')
+ alias_targets++;
+ slab++;
+ break;
+ default :
+ fatal("Unknown file type %lx\n", de->d_type);
+ }
+ }
+ closedir(dir);
+ slabs = slab - slabinfo;
+ actual_slabs = slabs;
+ aliases = alias - aliasinfo;
+ if (slabs > MAX_SLABS)
+ fatal("Too many slabs\n");
+ if (aliases > MAX_ALIASES)
+ fatal("Too many aliases\n");
+}
+
+static void output_slabs(void)
+{
+ struct slabinfo *slab;
+
+ for (slab = slabinfo; slab < slabinfo + slabs; slab++) {
+
+ if (slab->alias)
+ continue;
+
+
+ if (show_numa)
+ slab_numa(slab, 0);
+ else if (show_track)
+ show_tracking(slab);
+ else if (validate)
+ slab_validate(slab);
+ else if (shrink)
+ slab_shrink(slab);
+ else if (set_debug)
+ slab_debug(slab);
+ else if (show_ops)
+ ops(slab);
+ else if (show_slab)
+ slabcache(slab);
+ else if (show_report)
+ report(slab);
+ }
+}
+
+struct option opts[] = {
+ { "aliases", 0, NULL, 'a' },
+ { "activity", 0, NULL, 'A' },
+ { "debug", 2, NULL, 'd' },
+ { "display-activity", 0, NULL, 'D' },
+ { "empty", 0, NULL, 'e' },
+ { "first-alias", 0, NULL, 'f' },
+ { "help", 0, NULL, 'h' },
+ { "inverted", 0, NULL, 'i'},
+ { "numa", 0, NULL, 'n' },
+ { "ops", 0, NULL, 'o' },
+ { "report", 0, NULL, 'r' },
+ { "shrink", 0, NULL, 's' },
+ { "slabs", 0, NULL, 'l' },
+ { "track", 0, NULL, 't'},
+ { "validate", 0, NULL, 'v' },
+ { "zero", 0, NULL, 'z' },
+ { "1ref", 0, NULL, '1'},
+ { NULL, 0, NULL, 0 }
+};
+
+int main(int argc, char *argv[])
+{
+ int c;
+ int err;
+ char *pattern_source;
+
+ page_size = getpagesize();
+
+ while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS",
+ opts, NULL)) != -1)
+ switch (c) {
+ case '1':
+ show_single_ref = 1;
+ break;
+ case 'a':
+ show_alias = 1;
+ break;
+ case 'A':
+ sort_active = 1;
+ break;
+ case 'd':
+ set_debug = 1;
+ if (!debug_opt_scan(optarg))
+ fatal("Invalid debug option '%s'\n", optarg);
+ break;
+ case 'D':
+ show_activity = 1;
+ break;
+ case 'e':
+ show_empty = 1;
+ break;
+ case 'f':
+ show_first_alias = 1;
+ break;
+ case 'h':
+ usage();
+ return 0;
+ case 'i':
+ show_inverted = 1;
+ break;
+ case 'n':
+ show_numa = 1;
+ break;
+ case 'o':
+ show_ops = 1;
+ break;
+ case 'r':
+ show_report = 1;
+ break;
+ case 's':
+ shrink = 1;
+ break;
+ case 'l':
+ show_slab = 1;
+ break;
+ case 't':
+ show_track = 1;
+ break;
+ case 'v':
+ validate = 1;
+ break;
+ case 'z':
+ skip_zero = 0;
+ break;
+ case 'T':
+ show_totals = 1;
+ break;
+ case 'S':
+ sort_size = 1;
+ break;
+
+ default:
+ fatal("%s: Invalid option '%c'\n", argv[0], optopt);
+
+ }
+
+ if (!show_slab && !show_alias && !show_track && !show_report
+ && !validate && !shrink && !set_debug && !show_ops)
+ show_slab = 1;
+
+ if (argc > optind)
+ pattern_source = argv[optind];
+ else
+ pattern_source = ".*";
+
+ err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB);
+ if (err)
+ fatal("%s: Invalid pattern '%s' code %d\n",
+ argv[0], pattern_source, err);
+ read_slab_dir();
+ if (show_alias)
+ alias();
+ else
+ if (show_totals)
+ totals();
+ else {
+ link_slabs();
+ rename_slabs();
+ sort_slabs();
+ output_slabs();
+ }
+ return 0;
+}
diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl
new file mode 100755
index 00000000000..9a571e71683
--- /dev/null
+++ b/tools/testing/ktest/compare-ktest-sample.pl
@@ -0,0 +1,30 @@
+#!/usr/bin/perl
+
+open (IN,"ktest.pl");
+while (<IN>) {
+ if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ ||
+ /set_test_option\("(.*?)"/) {
+ $opt{$1} = 1;
+ }
+}
+close IN;
+
+open (IN, "sample.conf");
+while (<IN>) {
+ if (/^\s*#?\s*(\S+)\s*=/) {
+ $samp{$1} = 1;
+ }
+}
+close IN;
+
+foreach $opt (keys %opt) {
+ if (!defined($samp{$opt})) {
+ print "opt = $opt\n";
+ }
+}
+
+foreach $samp (keys %samp) {
+ if (!defined($opt{$samp})) {
+ print "samp = $samp\n";
+ }
+}
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
new file mode 100755
index 00000000000..e1c62eeb88f
--- /dev/null
+++ b/tools/testing/ktest/ktest.pl
@@ -0,0 +1,2023 @@
+#!/usr/bin/perl -w
+#
+# Copywrite 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+use strict;
+use IPC::Open2;
+use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
+use File::Path qw(mkpath);
+use File::Copy qw(cp);
+use FileHandle;
+
+my $VERSION = "0.2";
+
+$| = 1;
+
+my %opt;
+my %repeat_tests;
+my %repeats;
+my %default;
+
+#default opts
+$default{"NUM_TESTS"} = 1;
+$default{"REBOOT_TYPE"} = "grub";
+$default{"TEST_TYPE"} = "test";
+$default{"BUILD_TYPE"} = "randconfig";
+$default{"MAKE_CMD"} = "make";
+$default{"TIMEOUT"} = 120;
+$default{"TMP_DIR"} = "/tmp/ktest";
+$default{"SLEEP_TIME"} = 60; # sleep time between tests
+$default{"BUILD_NOCLEAN"} = 0;
+$default{"REBOOT_ON_ERROR"} = 0;
+$default{"POWEROFF_ON_ERROR"} = 0;
+$default{"REBOOT_ON_SUCCESS"} = 1;
+$default{"POWEROFF_ON_SUCCESS"} = 0;
+$default{"BUILD_OPTIONS"} = "";
+$default{"BISECT_SLEEP_TIME"} = 60; # sleep time between bisects
+$default{"CLEAR_LOG"} = 0;
+$default{"SUCCESS_LINE"} = "login:";
+$default{"BOOTED_TIMEOUT"} = 1;
+$default{"DIE_ON_FAILURE"} = 1;
+$default{"SSH_EXEC"} = "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND";
+$default{"SCP_TO_TARGET"} = "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE";
+$default{"REBOOT"} = "ssh \$SSH_USER\@\$MACHINE reboot";
+$default{"STOP_AFTER_SUCCESS"} = 10;
+$default{"STOP_AFTER_FAILURE"} = 60;
+$default{"LOCALVERSION"} = "-test";
+
+my $ktest_config;
+my $version;
+my $machine;
+my $ssh_user;
+my $tmpdir;
+my $builddir;
+my $outputdir;
+my $output_config;
+my $test_type;
+my $build_type;
+my $build_options;
+my $reboot_type;
+my $reboot_script;
+my $power_cycle;
+my $reboot;
+my $reboot_on_error;
+my $poweroff_on_error;
+my $die_on_failure;
+my $powercycle_after_reboot;
+my $poweroff_after_halt;
+my $ssh_exec;
+my $scp_to_target;
+my $power_off;
+my $grub_menu;
+my $grub_number;
+my $target;
+my $make;
+my $post_install;
+my $noclean;
+my $minconfig;
+my $addconfig;
+my $in_bisect = 0;
+my $bisect_bad = "";
+my $reverse_bisect;
+my $in_patchcheck = 0;
+my $run_test;
+my $redirect;
+my $buildlog;
+my $dmesg;
+my $monitor_fp;
+my $monitor_pid;
+my $monitor_cnt = 0;
+my $sleep_time;
+my $bisect_sleep_time;
+my $store_failures;
+my $timeout;
+my $booted_timeout;
+my $console;
+my $success_line;
+my $stop_after_success;
+my $stop_after_failure;
+my $build_target;
+my $target_image;
+my $localversion;
+my $iteration = 0;
+my $successes = 0;
+
+my %entered_configs;
+my %config_help;
+
+$config_help{"MACHINE"} = << "EOF"
+ The machine hostname that you will test.
+EOF
+ ;
+$config_help{"SSH_USER"} = << "EOF"
+ The box is expected to have ssh on normal bootup, provide the user
+ (most likely root, since you need privileged operations)
+EOF
+ ;
+$config_help{"BUILD_DIR"} = << "EOF"
+ The directory that contains the Linux source code (full path).
+EOF
+ ;
+$config_help{"OUTPUT_DIR"} = << "EOF"
+ The directory that the objects will be built (full path).
+ (can not be same as BUILD_DIR)
+EOF
+ ;
+$config_help{"BUILD_TARGET"} = << "EOF"
+ The location of the compiled file to copy to the target.
+ (relative to OUTPUT_DIR)
+EOF
+ ;
+$config_help{"TARGET_IMAGE"} = << "EOF"
+ The place to put your image on the test machine.
+EOF
+ ;
+$config_help{"POWER_CYCLE"} = << "EOF"
+ A script or command to reboot the box.
+
+ Here is a digital loggers power switch example
+ POWER_CYCLE = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin\@power/outlet?5=CCL'
+
+ Here is an example to reboot a virtual box on the current host
+ with the name "Guest".
+ POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+EOF
+ ;
+$config_help{"CONSOLE"} = << "EOF"
+ The script or command that reads the console
+
+ If you use ttywatch server, something like the following would work.
+CONSOLE = nc -d localhost 3001
+
+ For a virtual machine with guest name "Guest".
+CONSOLE = virsh console Guest
+EOF
+ ;
+$config_help{"LOCALVERSION"} = << "EOF"
+ Required version ending to differentiate the test
+ from other linux builds on the system.
+EOF
+ ;
+$config_help{"REBOOT_TYPE"} = << "EOF"
+ Way to reboot the box to the test kernel.
+ Only valid options so far are "grub" and "script".
+
+ If you specify grub, it will assume grub version 1
+ and will search in /boot/grub/menu.lst for the title \$GRUB_MENU
+ and select that target to reboot to the kernel. If this is not
+ your setup, then specify "script" and have a command or script
+ specified in REBOOT_SCRIPT to boot to the target.
+
+ The entry in /boot/grub/menu.lst must be entered in manually.
+ The test will not modify that file.
+EOF
+ ;
+$config_help{"GRUB_MENU"} = << "EOF"
+ The grub title name for the test kernel to boot
+ (Only mandatory if REBOOT_TYPE = grub)
+
+ Note, ktest.pl will not update the grub menu.lst, you need to
+ manually add an option for the test. ktest.pl will search
+ the grub menu.lst for this option to find what kernel to
+ reboot into.
+
+ For example, if in the /boot/grub/menu.lst the test kernel title has:
+ title Test Kernel
+ kernel vmlinuz-test
+ GRUB_MENU = Test Kernel
+EOF
+ ;
+$config_help{"REBOOT_SCRIPT"} = << "EOF"
+ A script to reboot the target into the test kernel
+ (Only mandatory if REBOOT_TYPE = script)
+EOF
+ ;
+
+
+sub get_ktest_config {
+ my ($config) = @_;
+
+ return if (defined($opt{$config}));
+
+ if (defined($config_help{$config})) {
+ print "\n";
+ print $config_help{$config};
+ }
+
+ for (;;) {
+ print "$config = ";
+ if (defined($default{$config})) {
+ print "\[$default{$config}\] ";
+ }
+ $entered_configs{$config} = <STDIN>;
+ $entered_configs{$config} =~ s/^\s*(.*\S)\s*$/$1/;
+ if ($entered_configs{$config} =~ /^\s*$/) {
+ if ($default{$config}) {
+ $entered_configs{$config} = $default{$config};
+ } else {
+ print "Your answer can not be blank\n";
+ next;
+ }
+ }
+ last;
+ }
+}
+
+sub get_ktest_configs {
+ get_ktest_config("MACHINE");
+ get_ktest_config("SSH_USER");
+ get_ktest_config("BUILD_DIR");
+ get_ktest_config("OUTPUT_DIR");
+ get_ktest_config("BUILD_TARGET");
+ get_ktest_config("TARGET_IMAGE");
+ get_ktest_config("POWER_CYCLE");
+ get_ktest_config("CONSOLE");
+ get_ktest_config("LOCALVERSION");
+
+ my $rtype = $opt{"REBOOT_TYPE"};
+
+ if (!defined($rtype)) {
+ if (!defined($opt{"GRUB_MENU"})) {
+ get_ktest_config("REBOOT_TYPE");
+ $rtype = $entered_configs{"REBOOT_TYPE"};
+ } else {
+ $rtype = "grub";
+ }
+ }
+
+ if ($rtype eq "grub") {
+ get_ktest_config("GRUB_MENU");
+ } else {
+ get_ktest_config("REBOOT_SCRIPT");
+ }
+}
+
+sub set_value {
+ my ($lvalue, $rvalue) = @_;
+
+ if (defined($opt{$lvalue})) {
+ die "Error: Option $lvalue defined more than once!\n";
+ }
+ if ($rvalue =~ /^\s*$/) {
+ delete $opt{$lvalue};
+ } else {
+ $opt{$lvalue} = $rvalue;
+ }
+}
+
+sub read_config {
+ my ($config) = @_;
+
+ open(IN, $config) || die "can't read file $config";
+
+ my $name = $config;
+ $name =~ s,.*/(.*),$1,;
+
+ my $test_num = 0;
+ my $default = 1;
+ my $repeat = 1;
+ my $num_tests_set = 0;
+ my $skip = 0;
+ my $rest;
+
+ while (<IN>) {
+
+ # ignore blank lines and comments
+ next if (/^\s*$/ || /\s*\#/);
+
+ if (/^\s*TEST_START(.*)/) {
+
+ $rest = $1;
+
+ if ($num_tests_set) {
+ die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+ }
+
+ my $old_test_num = $test_num;
+ my $old_repeat = $repeat;
+
+ $test_num += $repeat;
+ $default = 0;
+ $repeat = 1;
+
+ if ($rest =~ /\s+SKIP(.*)/) {
+ $rest = $1;
+ $skip = 1;
+ } else {
+ $skip = 0;
+ }
+
+ if ($rest =~ /\s+ITERATE\s+(\d+)(.*)$/) {
+ $repeat = $1;
+ $rest = $2;
+ $repeat_tests{"$test_num"} = $repeat;
+ }
+
+ if ($rest =~ /\s+SKIP(.*)/) {
+ $rest = $1;
+ $skip = 1;
+ }
+
+ if ($rest !~ /^\s*$/) {
+ die "$name: $.: Gargbage found after TEST_START\n$_";
+ }
+
+ if ($skip) {
+ $test_num = $old_test_num;
+ $repeat = $old_repeat;
+ }
+
+ } elsif (/^\s*DEFAULTS(.*)$/) {
+ $default = 1;
+
+ $rest = $1;
+
+ if ($rest =~ /\s+SKIP(.*)/) {
+ $rest = $1;
+ $skip = 1;
+ } else {
+ $skip = 0;
+ }
+
+ if ($rest !~ /^\s*$/) {
+ die "$name: $.: Gargbage found after DEFAULTS\n$_";
+ }
+
+ } elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
+
+ next if ($skip);
+
+ my $lvalue = $1;
+ my $rvalue = $2;
+
+ if (!$default &&
+ ($lvalue eq "NUM_TESTS" ||
+ $lvalue eq "LOG_FILE" ||
+ $lvalue eq "CLEAR_LOG")) {
+ die "$name: $.: $lvalue must be set in DEFAULTS section\n";
+ }
+
+ if ($lvalue eq "NUM_TESTS") {
+ if ($test_num) {
+ die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+ }
+ if (!$default) {
+ die "$name: $.: NUM_TESTS must be set in default section\n";
+ }
+ $num_tests_set = 1;
+ }
+
+ if ($default || $lvalue =~ /\[\d+\]$/) {
+ set_value($lvalue, $rvalue);
+ } else {
+ my $val = "$lvalue\[$test_num\]";
+ set_value($val, $rvalue);
+
+ if ($repeat > 1) {
+ $repeats{$val} = $repeat;
+ }
+ }
+ } else {
+ die "$name: $.: Garbage found in config\n$_";
+ }
+ }
+
+ close(IN);
+
+ if ($test_num) {
+ $test_num += $repeat - 1;
+ $opt{"NUM_TESTS"} = $test_num;
+ }
+
+ # make sure we have all mandatory configs
+ get_ktest_configs;
+
+ # set any defaults
+
+ foreach my $default (keys %default) {
+ if (!defined($opt{$default})) {
+ $opt{$default} = $default{$default};
+ }
+ }
+}
+
+sub _logit {
+ if (defined($opt{"LOG_FILE"})) {
+ open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+ print OUT @_;
+ close(OUT);
+ }
+}
+
+sub logit {
+ if (defined($opt{"LOG_FILE"})) {
+ _logit @_;
+ } else {
+ print @_;
+ }
+}
+
+sub doprint {
+ print @_;
+ _logit @_;
+}
+
+sub run_command;
+
+sub reboot {
+ # try to reboot normally
+ if (run_command $reboot) {
+ if (defined($powercycle_after_reboot)) {
+ sleep $powercycle_after_reboot;
+ run_command "$power_cycle";
+ }
+ } else {
+ # nope? power cycle it.
+ run_command "$power_cycle";
+ }
+}
+
+sub do_not_reboot {
+ my $i = $iteration;
+
+ return $test_type eq "build" ||
+ ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
+ ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build");
+}
+
+sub dodie {
+ doprint "CRITICAL FAILURE... ", @_, "\n";
+
+ my $i = $iteration;
+
+ if ($reboot_on_error && !do_not_reboot) {
+
+ doprint "REBOOTING\n";
+ reboot;
+
+ } elsif ($poweroff_on_error && defined($power_off)) {
+ doprint "POWERING OFF\n";
+ `$power_off`;
+ }
+
+ die @_, "\n";
+}
+
+sub open_console {
+ my ($fp) = @_;
+
+ my $flags;
+
+ my $pid = open($fp, "$console|") or
+ dodie "Can't open console $console";
+
+ $flags = fcntl($fp, F_GETFL, 0) or
+ dodie "Can't get flags for the socket: $!";
+ $flags = fcntl($fp, F_SETFL, $flags | O_NONBLOCK) or
+ dodie "Can't set flags for the socket: $!";
+
+ return $pid;
+}
+
+sub close_console {
+ my ($fp, $pid) = @_;
+
+ doprint "kill child process $pid\n";
+ kill 2, $pid;
+
+ print "closing!\n";
+ close($fp);
+}
+
+sub start_monitor {
+ if ($monitor_cnt++) {
+ return;
+ }
+ $monitor_fp = \*MONFD;
+ $monitor_pid = open_console $monitor_fp;
+
+ return;
+
+ open(MONFD, "Stop perl from warning about single use of MONFD");
+}
+
+sub end_monitor {
+ if (--$monitor_cnt) {
+ return;
+ }
+ close_console($monitor_fp, $monitor_pid);
+}
+
+sub wait_for_monitor {
+ my ($time) = @_;
+ my $line;
+
+ doprint "** Wait for monitor to settle down **\n";
+
+ # read the monitor and wait for the system to calm down
+ do {
+ $line = wait_for_input($monitor_fp, $time);
+ print "$line" if (defined($line));
+ } while (defined($line));
+ print "** Monitor flushed **\n";
+}
+
+sub fail {
+
+ if ($die_on_failure) {
+ dodie @_;
+ }
+
+ doprint "FAILED\n";
+
+ my $i = $iteration;
+
+ # no need to reboot for just building.
+ if (!do_not_reboot) {
+ doprint "REBOOTING\n";
+ reboot;
+ start_monitor;
+ wait_for_monitor $sleep_time;
+ end_monitor;
+ }
+
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "KTEST RESULT: TEST $i Failed: ", @_, "\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+
+ return 1 if (!defined($store_failures));
+
+ my @t = localtime;
+ my $date = sprintf "%04d%02d%02d%02d%02d%02d",
+ 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
+
+ my $type = $build_type;
+ if ($type =~ /useconfig/) {
+ $type = "useconfig";
+ }
+
+ my $dir = "$machine-$test_type-$type-fail-$date";
+ my $faildir = "$store_failures/$dir";
+
+ if (!-d $faildir) {
+ mkpath($faildir) or
+ die "can't create $faildir";
+ }
+ if (-f "$output_config") {
+ cp "$output_config", "$faildir/config" or
+ die "failed to copy .config";
+ }
+ if (-f $buildlog) {
+ cp $buildlog, "$faildir/buildlog" or
+ die "failed to move $buildlog";
+ }
+ if (-f $dmesg) {
+ cp $dmesg, "$faildir/dmesg" or
+ die "failed to move $dmesg";
+ }
+
+ doprint "*** Saved info to $faildir ***\n";
+
+ return 1;
+}
+
+sub run_command {
+ my ($command) = @_;
+ my $dolog = 0;
+ my $dord = 0;
+ my $pid;
+
+ $command =~ s/\$SSH_USER/$ssh_user/g;
+ $command =~ s/\$MACHINE/$machine/g;
+
+ doprint("$command ... ");
+
+ $pid = open(CMD, "$command 2>&1 |") or
+ (fail "unable to exec $command" and return 0);
+
+ if (defined($opt{"LOG_FILE"})) {
+ open(LOG, ">>$opt{LOG_FILE}") or
+ dodie "failed to write to log";
+ $dolog = 1;
+ }
+
+ if (defined($redirect)) {
+ open (RD, ">$redirect") or
+ dodie "failed to write to redirect $redirect";
+ $dord = 1;
+ }
+
+ while (<CMD>) {
+ print LOG if ($dolog);
+ print RD if ($dord);
+ }
+
+ waitpid($pid, 0);
+ my $failed = $?;
+
+ close(CMD);
+ close(LOG) if ($dolog);
+ close(RD) if ($dord);
+
+ if ($failed) {
+ doprint "FAILED!\n";
+ } else {
+ doprint "SUCCESS\n";
+ }
+
+ return !$failed;
+}
+
+sub run_ssh {
+ my ($cmd) = @_;
+ my $cp_exec = $ssh_exec;
+
+ $cp_exec =~ s/\$SSH_COMMAND/$cmd/g;
+ return run_command "$cp_exec";
+}
+
+sub run_scp {
+ my ($src, $dst) = @_;
+ my $cp_scp = $scp_to_target;
+
+ $cp_scp =~ s/\$SRC_FILE/$src/g;
+ $cp_scp =~ s/\$DST_FILE/$dst/g;
+
+ return run_command "$cp_scp";
+}
+
+sub get_grub_index {
+
+ if ($reboot_type ne "grub") {
+ return;
+ }
+ return if (defined($grub_number));
+
+ doprint "Find grub menu ... ";
+ $grub_number = -1;
+
+ my $ssh_grub = $ssh_exec;
+ $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g;
+
+ open(IN, "$ssh_grub |")
+ or die "unable to get menu.lst";
+
+ while (<IN>) {
+ if (/^\s*title\s+$grub_menu\s*$/) {
+ $grub_number++;
+ last;
+ } elsif (/^\s*title\s/) {
+ $grub_number++;
+ }
+ }
+ close(IN);
+
+ die "Could not find '$grub_menu' in /boot/grub/menu on $machine"
+ if ($grub_number < 0);
+ doprint "$grub_number\n";
+}
+
+sub wait_for_input
+{
+ my ($fp, $time) = @_;
+ my $rin;
+ my $ready;
+ my $line;
+ my $ch;
+
+ if (!defined($time)) {
+ $time = $timeout;
+ }
+
+ $rin = '';
+ vec($rin, fileno($fp), 1) = 1;
+ $ready = select($rin, undef, undef, $time);
+
+ $line = "";
+
+ # try to read one char at a time
+ while (sysread $fp, $ch, 1) {
+ $line .= $ch;
+ last if ($ch eq "\n");
+ }
+
+ if (!length($line)) {
+ return undef;
+ }
+
+ return $line;
+}
+
+sub reboot_to {
+ if ($reboot_type eq "grub") {
+ run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch; reboot)'";
+ return;
+ }
+
+ run_command "$reboot_script";
+}
+
+sub get_sha1 {
+ my ($commit) = @_;
+
+ doprint "git rev-list --max-count=1 $commit ... ";
+ my $sha1 = `git rev-list --max-count=1 $commit`;
+ my $ret = $?;
+
+ logit $sha1;
+
+ if ($ret) {
+ doprint "FAILED\n";
+ dodie "Failed to get git $commit";
+ }
+
+ print "SUCCESS\n";
+
+ chomp $sha1;
+
+ return $sha1;
+}
+
+sub monitor {
+ my $booted = 0;
+ my $bug = 0;
+ my $skip_call_trace = 0;
+ my $loops;
+
+ wait_for_monitor 5;
+
+ my $line;
+ my $full_line = "";
+
+ open(DMESG, "> $dmesg") or
+ die "unable to write to $dmesg";
+
+ reboot_to;
+
+ my $success_start;
+ my $failure_start;
+
+ for (;;) {
+
+ if ($booted) {
+ $line = wait_for_input($monitor_fp, $booted_timeout);
+ } else {
+ $line = wait_for_input($monitor_fp);
+ }
+
+ last if (!defined($line));
+
+ doprint $line;
+ print DMESG $line;
+
+ # we are not guaranteed to get a full line
+ $full_line .= $line;
+
+ if ($full_line =~ /$success_line/) {
+ $booted = 1;
+ $success_start = time;
+ }
+
+ if ($booted && defined($stop_after_success) &&
+ $stop_after_success >= 0) {
+ my $now = time;
+ if ($now - $success_start >= $stop_after_success) {
+ doprint "Test forced to stop after $stop_after_success seconds after success\n";
+ last;
+ }
+ }
+
+ if ($full_line =~ /\[ backtrace testing \]/) {
+ $skip_call_trace = 1;
+ }
+
+ if ($full_line =~ /call trace:/i) {
+ if (!$skip_call_trace) {
+ $bug = 1;
+ $failure_start = time;
+ }
+ }
+
+ if ($bug && defined($stop_after_failure) &&
+ $stop_after_failure >= 0) {
+ my $now = time;
+ if ($now - $failure_start >= $stop_after_failure) {
+ doprint "Test forced to stop after $stop_after_failure seconds after failure\n";
+ last;
+ }
+ }
+
+ if ($full_line =~ /\[ end of backtrace testing \]/) {
+ $skip_call_trace = 0;
+ }
+
+ if ($full_line =~ /Kernel panic -/) {
+ $bug = 1;
+ }
+
+ if ($line =~ /\n/) {
+ $full_line = "";
+ }
+ }
+
+ close(DMESG);
+
+ if ($bug) {
+ return 0 if ($in_bisect);
+ fail "failed - got a bug report" and return 0;
+ }
+
+ if (!$booted) {
+ return 0 if ($in_bisect);
+ fail "failed - never got a boot prompt." and return 0;
+ }
+
+ return 1;
+}
+
+sub install {
+
+ run_scp "$outputdir/$build_target", "$target_image" or
+ dodie "failed to copy image";
+
+ my $install_mods = 0;
+
+ # should we process modules?
+ $install_mods = 0;
+ open(IN, "$output_config") or dodie("Can't read config file");
+ while (<IN>) {
+ if (/CONFIG_MODULES(=y)?/) {
+ $install_mods = 1 if (defined($1));
+ last;
+ }
+ }
+ close(IN);
+
+ if (!$install_mods) {
+ doprint "No modules needed\n";
+ return;
+ }
+
+ run_command "$make INSTALL_MOD_PATH=$tmpdir modules_install" or
+ dodie "Failed to install modules";
+
+ my $modlib = "/lib/modules/$version";
+ my $modtar = "ktest-mods.tar.bz2";
+
+ run_ssh "rm -rf $modlib" or
+ dodie "failed to remove old mods: $modlib";
+
+ # would be nice if scp -r did not follow symbolic links
+ run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or
+ dodie "making tarball";
+
+ run_scp "$tmpdir/$modtar", "/tmp" or
+ dodie "failed to copy modules";
+
+ unlink "$tmpdir/$modtar";
+
+ run_ssh "'(cd / && tar xf /tmp/$modtar)'" or
+ dodie "failed to tar modules";
+
+ run_ssh "rm -f /tmp/$modtar";
+
+ return if (!defined($post_install));
+
+ my $cp_post_install = $post_install;
+ $cp_post_install = s/\$KERNEL_VERSION/$version/g;
+ run_command "$cp_post_install" or
+ dodie "Failed to run post install";
+}
+
+sub check_buildlog {
+ my ($patch) = @_;
+
+ my @files = `git show $patch | diffstat -l`;
+
+ open(IN, "git show $patch |") or
+ dodie "failed to show $patch";
+ while (<IN>) {
+ if (m,^--- a/(.*),) {
+ chomp $1;
+ $files[$#files] = $1;
+ }
+ }
+ close(IN);
+
+ open(IN, $buildlog) or dodie "Can't open $buildlog";
+ while (<IN>) {
+ if (/^\s*(.*?):.*(warning|error)/) {
+ my $err = $1;
+ foreach my $file (@files) {
+ my $fullpath = "$builddir/$file";
+ if ($file eq $err || $fullpath eq $err) {
+ fail "$file built with warnings" and return 0;
+ }
+ }
+ }
+ }
+ close(IN);
+
+ return 1;
+}
+
+sub build {
+ my ($type) = @_;
+ my $defconfig = "";
+
+ unlink $buildlog;
+
+ if ($type =~ /^useconfig:(.*)/) {
+ run_command "cp $1 $output_config" or
+ dodie "could not copy $1 to .config";
+
+ $type = "oldconfig";
+ }
+
+ # old config can ask questions
+ if ($type eq "oldconfig") {
+ $type = "oldnoconfig";
+
+ # allow for empty configs
+ run_command "touch $output_config";
+
+ run_command "mv $output_config $outputdir/config_temp" or
+ dodie "moving .config";
+
+ if (!$noclean && !run_command "$make mrproper") {
+ dodie "make mrproper";
+ }
+
+ run_command "mv $outputdir/config_temp $output_config" or
+ dodie "moving config_temp";
+
+ } elsif (!$noclean) {
+ unlink "$output_config";
+ run_command "$make mrproper" or
+ dodie "make mrproper";
+ }
+
+ # add something to distinguish this build
+ open(OUT, "> $outputdir/localversion") or dodie("Can't make localversion file");
+ print OUT "$localversion\n";
+ close(OUT);
+
+ if (defined($minconfig)) {
+ $defconfig = "KCONFIG_ALLCONFIG=$minconfig";
+ }
+
+ run_command "$defconfig $make $type" or
+ dodie "failed make config";
+
+ $redirect = "$buildlog";
+ if (!run_command "$make $build_options") {
+ undef $redirect;
+ # bisect may need this to pass
+ return 0 if ($in_bisect);
+ fail "failed build" and return 0;
+ }
+ undef $redirect;
+
+ return 1;
+}
+
+sub halt {
+ if (!run_ssh "halt" or defined($power_off)) {
+ if (defined($poweroff_after_halt)) {
+ sleep $poweroff_after_halt;
+ run_command "$power_off";
+ }
+ } else {
+ # nope? the zap it!
+ run_command "$power_off";
+ }
+}
+
+sub success {
+ my ($i) = @_;
+
+ $successes++;
+
+ doprint "\n\n*******************************************\n";
+ doprint "*******************************************\n";
+ doprint "KTEST RESULT: TEST $i SUCCESS!!!! **\n";
+ doprint "*******************************************\n";
+ doprint "*******************************************\n";
+
+ if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
+ doprint "Reboot and wait $sleep_time seconds\n";
+ reboot;
+ start_monitor;
+ wait_for_monitor $sleep_time;
+ end_monitor;
+ }
+}
+
+sub get_version {
+ # get the release name
+ doprint "$make kernelrelease ... ";
+ $version = `$make kernelrelease | tail -1`;
+ chomp($version);
+ doprint "$version\n";
+}
+
+sub child_run_test {
+ my $failed = 0;
+
+ # child should have no power
+ $reboot_on_error = 0;
+ $poweroff_on_error = 0;
+ $die_on_failure = 1;
+
+ run_command $run_test or $failed = 1;
+ exit $failed;
+}
+
+my $child_done;
+
+sub child_finished {
+ $child_done = 1;
+}
+
+sub do_run_test {
+ my $child_pid;
+ my $child_exit;
+ my $line;
+ my $full_line;
+ my $bug = 0;
+
+ wait_for_monitor 1;
+
+ doprint "run test $run_test\n";
+
+ $child_done = 0;
+
+ $SIG{CHLD} = qw(child_finished);
+
+ $child_pid = fork;
+
+ child_run_test if (!$child_pid);
+
+ $full_line = "";
+
+ do {
+ $line = wait_for_input($monitor_fp, 1);
+ if (defined($line)) {
+
+ # we are not guaranteed to get a full line
+ $full_line .= $line;
+
+ if ($full_line =~ /call trace:/i) {
+ $bug = 1;
+ }
+
+ if ($full_line =~ /Kernel panic -/) {
+ $bug = 1;
+ }
+
+ if ($line =~ /\n/) {
+ $full_line = "";
+ }
+ }
+ } while (!$child_done && !$bug);
+
+ if ($bug) {
+ doprint "Detected kernel crash!\n";
+ # kill the child with extreme prejudice
+ kill 9, $child_pid;
+ }
+
+ waitpid $child_pid, 0;
+ $child_exit = $?;
+
+ if ($bug || $child_exit) {
+ return 0 if $in_bisect;
+ fail "test failed" and return 0;
+ }
+ return 1;
+}
+
+sub run_git_bisect {
+ my ($command) = @_;
+
+ doprint "$command ... ";
+
+ my $output = `$command 2>&1`;
+ my $ret = $?;
+
+ logit $output;
+
+ if ($ret) {
+ doprint "FAILED\n";
+ dodie "Failed to git bisect";
+ }
+
+ doprint "SUCCESS\n";
+ if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) {
+ doprint "$1 [$2]\n";
+ } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) {
+ $bisect_bad = $1;
+ doprint "Found bad commit... $1\n";
+ return 0;
+ } else {
+ # we already logged it, just print it now.
+ print $output;
+ }
+
+ return 1;
+}
+
+# returns 1 on success, 0 on failure
+sub run_bisect_test {
+ my ($type, $buildtype) = @_;
+
+ my $failed = 0;
+ my $result;
+ my $output;
+ my $ret;
+
+ $in_bisect = 1;
+
+ build $buildtype or $failed = 1;
+
+ if ($type ne "build") {
+ dodie "Failed on build" if $failed;
+
+ # Now boot the box
+ get_grub_index;
+ get_version;
+ install;
+
+ start_monitor;
+ monitor or $failed = 1;
+
+ if ($type ne "boot") {
+ dodie "Failed on boot" if $failed;
+
+ do_run_test or $failed = 1;
+ }
+ end_monitor;
+ }
+
+ if ($failed) {
+ $result = 0;
+
+ # reboot the box to a good kernel
+ if ($type ne "build") {
+ doprint "Reboot and sleep $bisect_sleep_time seconds\n";
+ reboot;
+ start_monitor;
+ wait_for_monitor $bisect_sleep_time;
+ end_monitor;
+ }
+ } else {
+ $result = 1;
+ }
+ $in_bisect = 0;
+
+ return $result;
+}
+
+sub run_bisect {
+ my ($type) = @_;
+ my $buildtype = "oldconfig";
+
+ # We should have a minconfig to use?
+ if (defined($minconfig)) {
+ $buildtype = "useconfig:$minconfig";
+ }
+
+ my $ret = run_bisect_test $type, $buildtype;
+
+
+ # Are we looking for where it worked, not failed?
+ if ($reverse_bisect) {
+ $ret = !$ret;
+ }
+
+ if ($ret) {
+ return "good";
+ } else {
+ return "bad";
+ }
+}
+
+sub bisect {
+ my ($i) = @_;
+
+ my $result;
+
+ die "BISECT_GOOD[$i] not defined\n" if (!defined($opt{"BISECT_GOOD[$i]"}));
+ die "BISECT_BAD[$i] not defined\n" if (!defined($opt{"BISECT_BAD[$i]"}));
+ die "BISECT_TYPE[$i] not defined\n" if (!defined($opt{"BISECT_TYPE[$i]"}));
+
+ my $good = $opt{"BISECT_GOOD[$i]"};
+ my $bad = $opt{"BISECT_BAD[$i]"};
+ my $type = $opt{"BISECT_TYPE[$i]"};
+ my $start = $opt{"BISECT_START[$i]"};
+ my $replay = $opt{"BISECT_REPLAY[$i]"};
+
+ # convert to true sha1's
+ $good = get_sha1($good);
+ $bad = get_sha1($bad);
+
+ if (defined($opt{"BISECT_REVERSE[$i]"}) &&
+ $opt{"BISECT_REVERSE[$i]"} == 1) {
+ doprint "Performing a reverse bisect (bad is good, good is bad!)\n";
+ $reverse_bisect = 1;
+ } else {
+ $reverse_bisect = 0;
+ }
+
+ # Can't have a test without having a test to run
+ if ($type eq "test" && !defined($run_test)) {
+ $type = "boot";
+ }
+
+ my $check = $opt{"BISECT_CHECK[$i]"};
+ if (defined($check) && $check ne "0") {
+
+ # get current HEAD
+ my $head = get_sha1("HEAD");
+
+ if ($check ne "good") {
+ doprint "TESTING BISECT BAD [$bad]\n";
+ run_command "git checkout $bad" or
+ die "Failed to checkout $bad";
+
+ $result = run_bisect $type;
+
+ if ($result ne "bad") {
+ fail "Tested BISECT_BAD [$bad] and it succeeded" and return 0;
+ }
+ }
+
+ if ($check ne "bad") {
+ doprint "TESTING BISECT GOOD [$good]\n";
+ run_command "git checkout $good" or
+ die "Failed to checkout $good";
+
+ $result = run_bisect $type;
+
+ if ($result ne "good") {
+ fail "Tested BISECT_GOOD [$good] and it failed" and return 0;
+ }
+ }
+
+ # checkout where we started
+ run_command "git checkout $head" or
+ die "Failed to checkout $head";
+ }
+
+ run_command "git bisect start" or
+ dodie "could not start bisect";
+
+ run_command "git bisect good $good" or
+ dodie "could not set bisect good to $good";
+
+ run_git_bisect "git bisect bad $bad" or
+ dodie "could not set bisect bad to $bad";
+
+ if (defined($replay)) {
+ run_command "git bisect replay $replay" or
+ dodie "failed to run replay";
+ }
+
+ if (defined($start)) {
+ run_command "git checkout $start" or
+ dodie "failed to checkout $start";
+ }
+
+ my $test;
+ do {
+ $result = run_bisect $type;
+ $test = run_git_bisect "git bisect $result";
+ } while ($test);
+
+ run_command "git bisect log" or
+ dodie "could not capture git bisect log";
+
+ run_command "git bisect reset" or
+ dodie "could not reset git bisect";
+
+ doprint "Bad commit was [$bisect_bad]\n";
+
+ success $i;
+}
+
+my %config_ignore;
+my %config_set;
+
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+sub process_config_ignore {
+ my ($config) = @_;
+
+ open (IN, $config)
+ or dodie "Failed to read $config";
+
+ while (<IN>) {
+ if (/^(.*?(CONFIG\S*)(=.*| is not set))/) {
+ $config_ignore{$2} = $1;
+ }
+ }
+
+ close(IN);
+}
+
+sub read_current_config {
+ my ($config_ref) = @_;
+
+ %{$config_ref} = ();
+ undef %{$config_ref};
+
+ my @key = keys %{$config_ref};
+ if ($#key >= 0) {
+ print "did not delete!\n";
+ exit;
+ }
+ open (IN, "$output_config");
+
+ while (<IN>) {
+ if (/^(CONFIG\S+)=(.*)/) {
+ ${$config_ref}{$1} = $2;
+ }
+ }
+ close(IN);
+}
+
+sub get_dependencies {
+ my ($config) = @_;
+
+ my $arr = $dependency{$config};
+ if (!defined($arr)) {
+ return ();
+ }
+
+ my @deps = @{$arr};
+
+ foreach my $dep (@{$arr}) {
+ print "ADD DEP $dep\n";
+ @deps = (@deps, get_dependencies $dep);
+ }
+
+ return @deps;
+}
+
+sub create_config {
+ my @configs = @_;
+
+ open(OUT, ">$output_config") or dodie "Can not write to $output_config";
+
+ foreach my $config (@configs) {
+ print OUT "$config_set{$config}\n";
+ my @deps = get_dependencies $config;
+ foreach my $dep (@deps) {
+ print OUT "$config_set{$dep}\n";
+ }
+ }
+
+ foreach my $config (keys %config_ignore) {
+ print OUT "$config_ignore{$config}\n";
+ }
+ close(OUT);
+
+# exit;
+ run_command "$make oldnoconfig" or
+ dodie "failed make config oldconfig";
+
+}
+
+sub compare_configs {
+ my (%a, %b) = @_;
+
+ foreach my $item (keys %a) {
+ if (!defined($b{$item})) {
+ print "diff $item\n";
+ return 1;
+ }
+ delete $b{$item};
+ }
+
+ my @keys = keys %b;
+ if ($#keys) {
+ print "diff2 $keys[0]\n";
+ }
+ return -1 if ($#keys >= 0);
+
+ return 0;
+}
+
+sub run_config_bisect_test {
+ my ($type) = @_;
+
+ return run_bisect_test $type, "oldconfig";
+}
+
+sub process_passed {
+ my (%configs) = @_;
+
+ doprint "These configs had no failure: (Enabling them for further compiles)\n";
+ # Passed! All these configs are part of a good compile.
+ # Add them to the min options.
+ foreach my $config (keys %configs) {
+ if (defined($config_list{$config})) {
+ doprint " removing $config\n";
+ $config_ignore{$config} = $config_list{$config};
+ delete $config_list{$config};
+ }
+ }
+ doprint "config copied to $outputdir/config_good\n";
+ run_command "cp -f $output_config $outputdir/config_good";
+}
+
+sub process_failed {
+ my ($config) = @_;
+
+ doprint "\n\n***************************************\n";
+ doprint "Found bad config: $config\n";
+ doprint "***************************************\n\n";
+}
+
+sub run_config_bisect {
+
+ my @start_list = keys %config_list;
+
+ if ($#start_list < 0) {
+ doprint "No more configs to test!!!\n";
+ return -1;
+ }
+
+ doprint "***** RUN TEST ***\n";
+ my $type = $opt{"CONFIG_BISECT_TYPE[$iteration]"};
+ my $ret;
+ my %current_config;
+
+ my $count = $#start_list + 1;
+ doprint " $count configs to test\n";
+
+ my $half = int($#start_list / 2);
+
+ do {
+ my @tophalf = @start_list[0 .. $half];
+
+ create_config @tophalf;
+ read_current_config \%current_config;
+
+ $count = $#tophalf + 1;
+ doprint "Testing $count configs\n";
+ my $found = 0;
+ # make sure we test something
+ foreach my $config (@tophalf) {
+ if (defined($current_config{$config})) {
+ logit " $config\n";
+ $found = 1;
+ }
+ }
+ if (!$found) {
+ # try the other half
+ doprint "Top half produced no set configs, trying bottom half\n";
+ @tophalf = @start_list[$half .. $#start_list];
+ create_config @tophalf;
+ read_current_config \%current_config;
+ foreach my $config (@tophalf) {
+ if (defined($current_config{$config})) {
+ logit " $config\n";
+ $found = 1;
+ }
+ }
+ if (!$found) {
+ doprint "Failed: Can't make new config with current configs\n";
+ foreach my $config (@start_list) {
+ doprint " CONFIG: $config\n";
+ }
+ return -1;
+ }
+ $count = $#tophalf + 1;
+ doprint "Testing $count configs\n";
+ }
+
+ $ret = run_config_bisect_test $type;
+
+ if ($ret) {
+ process_passed %current_config;
+ return 0;
+ }
+
+ doprint "This config had a failure.\n";
+ doprint "Removing these configs that were not set in this config:\n";
+ doprint "config copied to $outputdir/config_bad\n";
+ run_command "cp -f $output_config $outputdir/config_bad";
+
+ # A config exists in this group that was bad.
+ foreach my $config (keys %config_list) {
+ if (!defined($current_config{$config})) {
+ doprint " removing $config\n";
+ delete $config_list{$config};
+ }
+ }
+
+ @start_list = @tophalf;
+
+ if ($#start_list == 0) {
+ process_failed $start_list[0];
+ return 1;
+ }
+
+ # remove half the configs we are looking at and see if
+ # they are good.
+ $half = int($#start_list / 2);
+ } while ($half > 0);
+
+ # we found a single config, try it again
+ my @tophalf = @start_list[0 .. 0];
+
+ $ret = run_config_bisect_test $type;
+ if ($ret) {
+ process_passed %current_config;
+ return 0;
+ }
+
+ process_failed $start_list[0];
+ return 1;
+}
+
+sub config_bisect {
+ my ($i) = @_;
+
+ my $start_config = $opt{"CONFIG_BISECT[$i]"};
+
+ my $tmpconfig = "$tmpdir/use_config";
+
+ # Make the file with the bad config and the min config
+ if (defined($minconfig)) {
+ # read the min config for things to ignore
+ run_command "cp $minconfig $tmpconfig" or
+ dodie "failed to copy $minconfig to $tmpconfig";
+ } else {
+ unlink $tmpconfig;
+ }
+
+ # Add other configs
+ if (defined($addconfig)) {
+ run_command "cat $addconfig >> $tmpconfig" or
+ dodie "failed to append $addconfig";
+ }
+
+ my $defconfig = "";
+ if (-f $tmpconfig) {
+ $defconfig = "KCONFIG_ALLCONFIG=$tmpconfig";
+ process_config_ignore $tmpconfig;
+ }
+
+ # now process the start config
+ run_command "cp $start_config $output_config" or
+ dodie "failed to copy $start_config to $output_config";
+
+ # read directly what we want to check
+ my %config_check;
+ open (IN, $output_config)
+ or dodie "faied to open $output_config";
+
+ while (<IN>) {
+ if (/^((CONFIG\S*)=.*)/) {
+ $config_check{$2} = $1;
+ }
+ }
+ close(IN);
+
+ # Now run oldconfig with the minconfig (and addconfigs)
+ run_command "$defconfig $make oldnoconfig" or
+ dodie "failed make config oldconfig";
+
+ # check to see what we lost (or gained)
+ open (IN, $output_config)
+ or dodie "Failed to read $start_config";
+
+ my %removed_configs;
+ my %added_configs;
+
+ while (<IN>) {
+ if (/^((CONFIG\S*)=.*)/) {
+ # save off all options
+ $config_set{$2} = $1;
+ if (defined($config_check{$2})) {
+ if (defined($config_ignore{$2})) {
+ $removed_configs{$2} = $1;
+ } else {
+ $config_list{$2} = $1;
+ }
+ } elsif (!defined($config_ignore{$2})) {
+ $added_configs{$2} = $1;
+ $config_list{$2} = $1;
+ }
+ }
+ }
+ close(IN);
+
+ my @confs = keys %removed_configs;
+ if ($#confs >= 0) {
+ doprint "Configs overridden by default configs and removed from check:\n";
+ foreach my $config (@confs) {
+ doprint " $config\n";
+ }
+ }
+ @confs = keys %added_configs;
+ if ($#confs >= 0) {
+ doprint "Configs appearing in make oldconfig and added:\n";
+ foreach my $config (@confs) {
+ doprint " $config\n";
+ }
+ }
+
+ my %config_test;
+ my $once = 0;
+
+ # Sometimes kconfig does weird things. We must make sure
+ # that the config we autocreate has everything we need
+ # to test, otherwise we may miss testing configs, or
+ # may not be able to create a new config.
+ # Here we create a config with everything set.
+ create_config (keys %config_list);
+ read_current_config \%config_test;
+ foreach my $config (keys %config_list) {
+ if (!defined($config_test{$config})) {
+ if (!$once) {
+ $once = 1;
+ doprint "Configs not produced by kconfig (will not be checked):\n";
+ }
+ doprint " $config\n";
+ delete $config_list{$config};
+ }
+ }
+ my $ret;
+ do {
+ $ret = run_config_bisect;
+ } while (!$ret);
+
+ return $ret if ($ret < 0);
+
+ success $i;
+}
+
+sub patchcheck {
+ my ($i) = @_;
+
+ die "PATCHCHECK_START[$i] not defined\n"
+ if (!defined($opt{"PATCHCHECK_START[$i]"}));
+ die "PATCHCHECK_TYPE[$i] not defined\n"
+ if (!defined($opt{"PATCHCHECK_TYPE[$i]"}));
+
+ my $start = $opt{"PATCHCHECK_START[$i]"};
+
+ my $end = "HEAD";
+ if (defined($opt{"PATCHCHECK_END[$i]"})) {
+ $end = $opt{"PATCHCHECK_END[$i]"};
+ }
+
+ # Get the true sha1's since we can use things like HEAD~3
+ $start = get_sha1($start);
+ $end = get_sha1($end);
+
+ my $type = $opt{"PATCHCHECK_TYPE[$i]"};
+
+ # Can't have a test without having a test to run
+ if ($type eq "test" && !defined($run_test)) {
+ $type = "boot";
+ }
+
+ open (IN, "git log --pretty=oneline $end|") or
+ dodie "could not get git list";
+
+ my @list;
+
+ while (<IN>) {
+ chomp;
+ $list[$#list+1] = $_;
+ last if (/^$start/);
+ }
+ close(IN);
+
+ if ($list[$#list] !~ /^$start/) {
+ fail "SHA1 $start not found";
+ }
+
+ # go backwards in the list
+ @list = reverse @list;
+
+ my $save_clean = $noclean;
+
+ $in_patchcheck = 1;
+ foreach my $item (@list) {
+ my $sha1 = $item;
+ $sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+
+ doprint "\nProcessing commit $item\n\n";
+
+ run_command "git checkout $sha1" or
+ die "Failed to checkout $sha1";
+
+ # only clean on the first and last patch
+ if ($item eq $list[0] ||
+ $item eq $list[$#list]) {
+ $noclean = $save_clean;
+ } else {
+ $noclean = 1;
+ }
+
+ if (defined($minconfig)) {
+ build "useconfig:$minconfig" or return 0;
+ } else {
+ # ?? no config to use?
+ build "oldconfig" or return 0;
+ }
+
+ check_buildlog $sha1 or return 0;
+
+ next if ($type eq "build");
+
+ get_grub_index;
+ get_version;
+ install;
+
+ my $failed = 0;
+
+ start_monitor;
+ monitor or $failed = 1;
+
+ if (!$failed && $type ne "boot"){
+ do_run_test or $failed = 1;
+ }
+ end_monitor;
+ return 0 if ($failed);
+
+ }
+ $in_patchcheck = 0;
+ success $i;
+
+ return 1;
+}
+
+$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl config-file\n";
+
+if ($#ARGV == 0) {
+ $ktest_config = $ARGV[0];
+ if (! -f $ktest_config) {
+ print "$ktest_config does not exist.\n";
+ my $ans;
+ for (;;) {
+ print "Create it? [Y/n] ";
+ $ans = <STDIN>;
+ chomp $ans;
+ if ($ans =~ /^\s*$/) {
+ $ans = "y";
+ }
+ last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+ print "Please answer either 'y' or 'n'.\n";
+ }
+ if ($ans !~ /^y$/i) {
+ exit 0;
+ }
+ }
+} else {
+ $ktest_config = "ktest.conf";
+}
+
+if (! -f $ktest_config) {
+ open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
+ print OUT << "EOF"
+# Generated by ktest.pl
+#
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START
+
+DEFAULTS
+EOF
+;
+ close(OUT);
+}
+read_config $ktest_config;
+
+# Append any configs entered in manually to the config file.
+my @new_configs = keys %entered_configs;
+if ($#new_configs >= 0) {
+ print "\nAppending entered in configs to $ktest_config\n";
+ open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
+ foreach my $config (@new_configs) {
+ print OUT "$config = $entered_configs{$config}\n";
+ $opt{$config} = $entered_configs{$config};
+ }
+}
+
+if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) {
+ unlink $opt{"LOG_FILE"};
+}
+
+doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
+
+for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
+
+ if (!$i) {
+ doprint "DEFAULT OPTIONS:\n";
+ } else {
+ doprint "\nTEST $i OPTIONS";
+ if (defined($repeat_tests{$i})) {
+ $repeat = $repeat_tests{$i};
+ doprint " ITERATE $repeat";
+ }
+ doprint "\n";
+ }
+
+ foreach my $option (sort keys %opt) {
+
+ if ($option =~ /\[(\d+)\]$/) {
+ next if ($i != $1);
+ } else {
+ next if ($i);
+ }
+
+ doprint "$option = $opt{$option}\n";
+ }
+}
+
+sub set_test_option {
+ my ($name, $i) = @_;
+
+ my $option = "$name\[$i\]";
+
+ if (defined($opt{$option})) {
+ return $opt{$option};
+ }
+
+ foreach my $test (keys %repeat_tests) {
+ if ($i >= $test &&
+ $i < $test + $repeat_tests{$test}) {
+ $option = "$name\[$test\]";
+ if (defined($opt{$option})) {
+ return $opt{$option};
+ }
+ }
+ }
+
+ if (defined($opt{$name})) {
+ return $opt{$name};
+ }
+
+ return undef;
+}
+
+# First we need to do is the builds
+for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
+
+ $iteration = $i;
+
+ my $makecmd = set_test_option("MAKE_CMD", $i);
+
+ $machine = set_test_option("MACHINE", $i);
+ $ssh_user = set_test_option("SSH_USER", $i);
+ $tmpdir = set_test_option("TMP_DIR", $i);
+ $outputdir = set_test_option("OUTPUT_DIR", $i);
+ $builddir = set_test_option("BUILD_DIR", $i);
+ $test_type = set_test_option("TEST_TYPE", $i);
+ $build_type = set_test_option("BUILD_TYPE", $i);
+ $build_options = set_test_option("BUILD_OPTIONS", $i);
+ $power_cycle = set_test_option("POWER_CYCLE", $i);
+ $reboot = set_test_option("REBOOT", $i);
+ $noclean = set_test_option("BUILD_NOCLEAN", $i);
+ $minconfig = set_test_option("MIN_CONFIG", $i);
+ $run_test = set_test_option("TEST", $i);
+ $addconfig = set_test_option("ADD_CONFIG", $i);
+ $reboot_type = set_test_option("REBOOT_TYPE", $i);
+ $grub_menu = set_test_option("GRUB_MENU", $i);
+ $post_install = set_test_option("POST_INSTALL", $i);
+ $reboot_script = set_test_option("REBOOT_SCRIPT", $i);
+ $reboot_on_error = set_test_option("REBOOT_ON_ERROR", $i);
+ $poweroff_on_error = set_test_option("POWEROFF_ON_ERROR", $i);
+ $die_on_failure = set_test_option("DIE_ON_FAILURE", $i);
+ $power_off = set_test_option("POWER_OFF", $i);
+ $powercycle_after_reboot = set_test_option("POWERCYCLE_AFTER_REBOOT", $i);
+ $poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i);
+ $sleep_time = set_test_option("SLEEP_TIME", $i);
+ $bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i);
+ $store_failures = set_test_option("STORE_FAILURES", $i);
+ $timeout = set_test_option("TIMEOUT", $i);
+ $booted_timeout = set_test_option("BOOTED_TIMEOUT", $i);
+ $console = set_test_option("CONSOLE", $i);
+ $success_line = set_test_option("SUCCESS_LINE", $i);
+ $stop_after_success = set_test_option("STOP_AFTER_SUCCESS", $i);
+ $stop_after_failure = set_test_option("STOP_AFTER_FAILURE", $i);
+ $build_target = set_test_option("BUILD_TARGET", $i);
+ $ssh_exec = set_test_option("SSH_EXEC", $i);
+ $scp_to_target = set_test_option("SCP_TO_TARGET", $i);
+ $target_image = set_test_option("TARGET_IMAGE", $i);
+ $localversion = set_test_option("LOCALVERSION", $i);
+
+ chdir $builddir || die "can't change directory to $builddir";
+
+ if (!-d $tmpdir) {
+ mkpath($tmpdir) or
+ die "can't create $tmpdir";
+ }
+
+ $ENV{"SSH_USER"} = $ssh_user;
+ $ENV{"MACHINE"} = $machine;
+
+ $target = "$ssh_user\@$machine";
+
+ $buildlog = "$tmpdir/buildlog-$machine";
+ $dmesg = "$tmpdir/dmesg-$machine";
+ $make = "$makecmd O=$outputdir";
+ $output_config = "$outputdir/.config";
+
+ if ($reboot_type eq "grub") {
+ dodie "GRUB_MENU not defined" if (!defined($grub_menu));
+ } elsif (!defined($reboot_script)) {
+ dodie "REBOOT_SCRIPT not defined"
+ }
+
+ my $run_type = $build_type;
+ if ($test_type eq "patchcheck") {
+ $run_type = $opt{"PATCHCHECK_TYPE[$i]"};
+ } elsif ($test_type eq "bisect") {
+ $run_type = $opt{"BISECT_TYPE[$i]"};
+ } elsif ($test_type eq "config_bisect") {
+ $run_type = $opt{"CONFIG_BISECT_TYPE[$i]"};
+ }
+
+ # mistake in config file?
+ if (!defined($run_type)) {
+ $run_type = "ERROR";
+ }
+
+ doprint "\n\n";
+ doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type\n\n";
+
+ unlink $dmesg;
+ unlink $buildlog;
+
+ if (!defined($minconfig)) {
+ $minconfig = $addconfig;
+
+ } elsif (defined($addconfig)) {
+ run_command "cat $addconfig $minconfig > $tmpdir/add_config" or
+ dodie "Failed to create temp config";
+ $minconfig = "$tmpdir/add_config";
+ }
+
+ my $checkout = $opt{"CHECKOUT[$i]"};
+ if (defined($checkout)) {
+ run_command "git checkout $checkout" or
+ die "failed to checkout $checkout";
+ }
+
+ if ($test_type eq "bisect") {
+ bisect $i;
+ next;
+ } elsif ($test_type eq "config_bisect") {
+ config_bisect $i;
+ next;
+ } elsif ($test_type eq "patchcheck") {
+ patchcheck $i;
+ next;
+ }
+
+ if ($build_type ne "nobuild") {
+ build $build_type or next;
+ }
+
+ if ($test_type ne "build") {
+ get_grub_index;
+ get_version;
+ install;
+
+ my $failed = 0;
+ start_monitor;
+ monitor or $failed = 1;;
+
+ if (!$failed && $test_type ne "boot" && defined($run_test)) {
+ do_run_test or $failed = 1;
+ }
+ end_monitor;
+ next if ($failed);
+ }
+
+ success $i;
+}
+
+if ($opt{"POWEROFF_ON_SUCCESS"}) {
+ halt;
+} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot) {
+ reboot;
+}
+
+doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n";
+
+exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
new file mode 100644
index 00000000000..3408c594b2d
--- /dev/null
+++ b/tools/testing/ktest/sample.conf
@@ -0,0 +1,622 @@
+#
+# Config file for ktest.pl
+#
+# Note, all paths must be absolute
+#
+
+# Options set in the beginning of the file are considered to be
+# default options. These options can be overriden by test specific
+# options, with the following exceptions:
+#
+# LOG_FILE
+# CLEAR_LOG
+# POWEROFF_ON_SUCCESS
+# REBOOT_ON_SUCCESS
+#
+# Test specific options are set after the label:
+#
+# TEST_START
+#
+# The options after a TEST_START label are specific to that test.
+# Each TEST_START label will set up a new test. If you want to
+# perform a test more than once, you can add the ITERATE label
+# to it followed by the number of times you want that test
+# to iterate. If the ITERATE is left off, the test will only
+# be performed once.
+#
+# TEST_START ITERATE 10
+#
+# You can skip a test by adding SKIP (before or after the ITERATE
+# and number)
+#
+# TEST_START SKIP
+#
+# TEST_START SKIP ITERATE 10
+#
+# TEST_START ITERATE 10 SKIP
+#
+# The SKIP label causes the options and the test itself to be ignored.
+# This is useful to set up several different tests in one config file, and
+# only enabling the ones you want to use for a current test run.
+#
+# You can add default options anywhere in the file as well
+# with the DEFAULTS tag. This allows you to have default options
+# after the test options to keep the test options at the top
+# of the file. You can even place the DEFAULTS tag between
+# test cases (but not in the middle of a single test case)
+#
+# TEST_START
+# MIN_CONFIG = /home/test/config-test1
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-default
+#
+# TEST_START ITERATE 10
+#
+# The above will run the first test with MIN_CONFIG set to
+# /home/test/config-test-1. Then 10 tests will be executed
+# with MIN_CONFIG with /home/test/config-default.
+#
+# You can also disable defaults with the SKIP option
+#
+# DEFAULTS SKIP
+# MIN_CONFIG = /home/test/config-use-sometimes
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-most-times
+#
+# The above will ignore the first MIN_CONFIG. If you want to
+# use the first MIN_CONFIG, remove the SKIP from the first
+# DEFAULTS tag and add it to the second. Be careful, options
+# may only be declared once per test or default. If you have
+# the same option name under the same test or as default
+# ktest will fail to execute, and no tests will run.
+#
+
+
+#### Mandatory Default Options ####
+
+# These options must be in the default section, although most
+# may be overridden by test options.
+
+# The machine hostname that you will test
+#MACHINE = target
+
+# The box is expected to have ssh on normal bootup, provide the user
+# (most likely root, since you need privileged operations)
+#SSH_USER = root
+
+# The directory that contains the Linux source code
+#BUILD_DIR = /home/test/linux.git
+
+# The directory that the objects will be built
+# (can not be same as BUILD_DIR)
+#OUTPUT_DIR = /home/test/build/target
+
+# The location of the compiled file to copy to the target
+# (relative to OUTPUT_DIR)
+#BUILD_TARGET = arch/x86/boot/bzImage
+
+# The place to put your image on the test machine
+#TARGET_IMAGE = /boot/vmlinuz-test
+
+# A script or command to reboot the box
+#
+# Here is a digital loggers power switch example
+#POWER_CYCLE = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin@power/outlet?5=CCL'
+#
+# Here is an example to reboot a virtual box on the current host
+# with the name "Guest".
+#POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+
+# The script or command that reads the console
+#
+# If you use ttywatch server, something like the following would work.
+#CONSOLE = nc -d localhost 3001
+#
+# For a virtual machine with guest name "Guest".
+#CONSOLE = virsh console Guest
+
+# Required version ending to differentiate the test
+# from other linux builds on the system.
+#LOCALVERSION = -test
+
+# The grub title name for the test kernel to boot
+# (Only mandatory if REBOOT_TYPE = grub)
+#
+# Note, ktest.pl will not update the grub menu.lst, you need to
+# manually add an option for the test. ktest.pl will search
+# the grub menu.lst for this option to find what kernel to
+# reboot into.
+#
+# For example, if in the /boot/grub/menu.lst the test kernel title has:
+# title Test Kernel
+# kernel vmlinuz-test
+#GRUB_MENU = Test Kernel
+
+# A script to reboot the target into the test kernel
+# (Only mandatory if REBOOT_TYPE = script)
+#REBOOT_SCRIPT =
+
+#### Optional Config Options (all have defaults) ####
+
+# Start a test setup. If you leave this off, all options
+# will be default and the test will run once.
+# This is a label and not really an option (it takes no value).
+# You can append ITERATE and a number after it to iterate the
+# test a number of times, or SKIP to ignore this test.
+#
+#TEST_START
+#TEST_START ITERATE 5
+#TEST_START SKIP
+
+# Have the following options as default again. Used after tests
+# have already been defined by TEST_START. Optionally, you can
+# just define all default options before the first TEST_START
+# and you do not need this option.
+#
+# This is a label and not really an option (it takes no value).
+# You can append SKIP to this label and the options within this
+# section will be ignored.
+#
+# DEFAULTS
+# DEFAULTS SKIP
+
+# The default test type (default test)
+# The test types may be:
+# build - only build the kernel, do nothing else
+# boot - build and boot the kernel
+# test - build, boot and if TEST is set, run the test script
+# (If TEST is not set, it defaults back to boot)
+# bisect - Perform a bisect on the kernel (see BISECT_TYPE below)
+# patchcheck - Do a test on a series of commits in git (see PATCHCHECK below)
+#TEST_TYPE = test
+
+# Test to run if there is a successful boot and TEST_TYPE is test.
+# Must exit with 0 on success and non zero on error
+# default (undefined)
+#TEST = ssh user@machine /root/run_test
+
+# The build type is any make config type or special command
+# (default randconfig)
+# nobuild - skip the clean and build step
+# useconfig:/path/to/config - use the given config and run
+# oldconfig on it.
+# This option is ignored if TEST_TYPE is patchcheck or bisect
+#BUILD_TYPE = randconfig
+
+# The make command (default make)
+# If you are building a 32bit x86 on a 64 bit host
+#MAKE_CMD = CC=i386-gcc AS=i386-as make ARCH=i386
+
+# Any build options for the make of the kernel (not for other makes, like configs)
+# (default "")
+#BUILD_OPTIONS = -j20
+
+# If you need an initrd, you can add a script or code here to install
+# it. The environment variable KERNEL_VERSION will be set to the
+# kernel version that is used. Remember to add the initrd line
+# to your grub menu.lst file.
+#
+# Here's a couple of examples to use:
+#POST_INSTALL = ssh user@target /sbin/mkinitrd --allow-missing -f /boot/initramfs-test.img $KERNEL_VERSION
+#
+# or on some systems:
+#POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# Way to reboot the box to the test kernel.
+# Only valid options so far are "grub" and "script"
+# (default grub)
+# If you specify grub, it will assume grub version 1
+# and will search in /boot/grub/menu.lst for the title $GRUB_MENU
+# and select that target to reboot to the kernel. If this is not
+# your setup, then specify "script" and have a command or script
+# specified in REBOOT_SCRIPT to boot to the target.
+#
+# The entry in /boot/grub/menu.lst must be entered in manually.
+# The test will not modify that file.
+#REBOOT_TYPE = grub
+
+# The min config that is needed to build for the machine
+# A nice way to create this is with the following:
+#
+# $ ssh target
+# $ lsmod > mymods
+# $ scp mymods host:/tmp
+# $ exit
+# $ cd linux.git
+# $ rm .config
+# $ make LSMOD=mymods localyesconfig
+# $ grep '^CONFIG' .config > /home/test/config-min
+#
+# If you want even less configs:
+#
+# log in directly to target (do not ssh)
+#
+# $ su
+# # lsmod | cut -d' ' -f1 | xargs rmmod
+#
+# repeat the above several times
+#
+# # lsmod > mymods
+# # reboot
+#
+# May need to reboot to get your network back to copy the mymods
+# to the host, and then remove the previous .config and run the
+# localyesconfig again. The CONFIG_MIN generated like this will
+# not guarantee network activity to the box so the TEST_TYPE of
+# test may fail.
+#
+# You might also want to set:
+# CONFIG_CMDLINE="<your options here>"
+# randconfig may set the above and override your real command
+# line options.
+# (default undefined)
+#MIN_CONFIG = /home/test/config-min
+
+# Sometimes there's options that just break the boot and
+# you do not care about. Here are a few:
+# # CONFIG_STAGING is not set
+# Staging drivers are horrible, and can break the build.
+# # CONFIG_SCSI_DEBUG is not set
+# SCSI_DEBUG may change your root partition
+# # CONFIG_KGDB_SERIAL_CONSOLE is not set
+# KGDB may cause oops waiting for a connection that's not there.
+# This option points to the file containing config options that will be prepended
+# to the MIN_CONFIG (or be the MIN_CONFIG if it is not set)
+#
+# Note, config options in MIN_CONFIG will override these options.
+#
+# (default undefined)
+#ADD_CONFIG = /home/test/config-broken
+
+# The location on the host where to write temp files
+# (default /tmp/ktest)
+#TMP_DIR = /tmp/ktest
+
+# Optional log file to write the status (recommended)
+# Note, this is a DEFAULT section only option.
+# (default undefined)
+#LOG_FILE = /home/test/logfiles/target.log
+
+# Remove old logfile if it exists before starting all tests.
+# Note, this is a DEFAULT section only option.
+# (default 0)
+#CLEAR_LOG = 0
+
+# Line to define a successful boot up in console output.
+# This is what the line contains, not the entire line. If you need
+# the entire line to match, then use regural expression syntax like:
+# (do not add any quotes around it)
+#
+# SUCCESS_LINE = ^MyBox Login:$
+#
+# (default "login:")
+#SUCCESS_LINE = login:
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after success is recommended.
+# (in seconds)
+# (default 10)
+#STOP_AFTER_SUCCESS = 10
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after failure is recommended.
+# (in seconds)
+# (default 60)
+#STOP_AFTER_FAILURE = 60
+
+# Stop testing if a build fails. If set, the script will end if
+# a failure is detected, otherwise it will save off the .config,
+# dmesg and bootlog in a directory called
+# MACHINE-TEST_TYPE_BUILD_TYPE-fail-yyyymmddhhmmss
+# if the STORE_FAILURES directory is set.
+# (default 1)
+# Note, even if this is set to zero, there are some errors that still
+# stop the tests.
+#DIE_ON_FAILURE = 1
+
+# Directory to store failure directories on failure. If this is not
+# set, DIE_ON_FAILURE=0 will not save off the .config, dmesg and
+# bootlog. This option is ignored if DIE_ON_FAILURE is not set.
+# (default undefined)
+#STORE_FAILURES = /home/test/failures
+
+# Build without doing a make mrproper, or removing .config
+# (default 0)
+#BUILD_NOCLEAN = 0
+
+# As the test reads the console, after it hits the SUCCESS_LINE
+# the time it waits for the monitor to settle down between reads
+# can usually be lowered.
+# (in seconds) (default 1)
+#BOOTED_TIMEOUT = 1
+
+# The timeout in seconds when we consider the box hung after
+# the console stop producing output. Be sure to leave enough
+# time here to get pass a reboot. Some machines may not produce
+# any console output for a long time during a reboot. You do
+# not want the test to fail just because the system was in
+# the process of rebooting to the test kernel.
+# (default 120)
+#TIMEOUT = 120
+
+# In between tests, a reboot of the box may occur, and this
+# is the time to wait for the console after it stops producing
+# output. Some machines may not produce a large lag on reboot
+# so this should accommodate it.
+# The difference between this and TIMEOUT, is that TIMEOUT happens
+# when rebooting to the test kernel. This sleep time happens
+# after a test has completed and we are about to start running
+# another test. If a reboot to the reliable kernel happens,
+# we wait SLEEP_TIME for the console to stop producing output
+# before starting the next test.
+# (default 60)
+#SLEEP_TIME = 60
+
+# The time in between bisects to sleep (in seconds)
+# (default 60)
+#BISECT_SLEEP_TIME = 60
+
+# Reboot the target box on error (default 0)
+#REBOOT_ON_ERROR = 0
+
+# Power off the target on error (ignored if REBOOT_ON_ERROR is set)
+# Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_ERROR = 0
+
+# Power off the target after all tests have completed successfully
+# Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_SUCCESS = 0
+
+# Reboot the target after all test completed successfully (default 1)
+# (ignored if POWEROFF_ON_SUCCESS is set)
+#REBOOT_ON_SUCCESS = 1
+
+# In case there are isses with rebooting, you can specify this
+# to always powercycle after this amount of time after calling
+# reboot.
+# Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just
+# makes it powercycle immediately after rebooting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWERCYCLE_AFTER_REBOOT = 5
+
+# In case there's isses with halting, you can specify this
+# to always poweroff after this amount of time after calling
+# halt.
+# Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just
+# makes it poweroff immediately after halting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWEROFF_AFTER_HALT = 20
+
+# A script or command to power off the box (default undefined)
+# Needed for POWEROFF_ON_ERROR and SUCCESS
+#
+# Example for digital loggers power switch:
+#POWER_OFF = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin@power/outlet?5=OFF'
+#
+# Example for a virtual guest call "Guest".
+#POWER_OFF = virsh destroy Guest
+
+# The way to execute a command on the target
+# (default ssh $SSH_USER@$MACHINE $SSH_COMMAND";)
+# The variables SSH_USER, MACHINE and SSH_COMMAND are defined
+#SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND";
+
+# The way to copy a file to the target
+# (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE)
+# The variables SSH_USER, MACHINE, SRC_FILE and DST_FILE are defined.
+#SCP_TO_TARGET = scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE
+
+# The nice way to reboot the target
+# (default ssh $SSH_USER@$MACHINE reboot)
+# The variables SSH_USER and MACHINE are defined.
+#REBOOT = ssh $SSH_USER@$MACHINE reboot
+
+#### Per test run options ####
+# The following options are only allowed in TEST_START sections.
+# They are ignored in the DEFAULTS sections.
+#
+# All of these are optional and undefined by default, although
+# some of these options are required for TEST_TYPE of patchcheck
+# and bisect.
+#
+#
+# CHECKOUT = branch
+#
+# If the BUILD_DIR is a git repository, then you can set this option
+# to checkout the given branch before running the TEST. If you
+# specify this for the first run, that branch will be used for
+# all preceding tests until a new CHECKOUT is set.
+#
+#
+#
+# For TEST_TYPE = patchcheck
+#
+# This expects the BUILD_DIR to be a git repository, and
+# will checkout the PATCHCHECK_START commit.
+#
+# The option BUILD_TYPE will be ignored.
+#
+# The MIN_CONFIG will be used for all builds of the patchcheck. The build type
+# used for patchcheck is oldconfig.
+#
+# PATCHCHECK_START is required and is the first patch to
+# test (the SHA1 of the commit). You may also specify anything
+# that git checkout allows (branch name, tage, HEAD~3).
+#
+# PATCHCHECK_END is the last patch to check (default HEAD)
+#
+# PATCHCHECK_TYPE is required and is the type of test to run:
+# build, boot, test.
+#
+# Note, the build test will look for warnings, if a warning occurred
+# in a file that a commit touches, the build will fail.
+#
+# If BUILD_NOCLEAN is set, then make mrproper will not be run on
+# any of the builds, just like all other TEST_TYPE tests. But
+# what makes patchcheck different from the other tests, is if
+# BUILD_NOCLEAN is not set, only the first and last patch run
+# make mrproper. This helps speed up the test.
+#
+# Example:
+# TEST_START
+# TEST_TYPE = patchcheck
+# CHECKOUT = mybranch
+# PATCHCHECK_TYPE = boot
+# PATCHCHECK_START = 747e94ae3d1b4c9bf5380e569f614eb9040b79e7
+# PATCHCHECK_END = HEAD~2
+#
+#
+#
+# For TEST_TYPE = bisect
+#
+# You can specify a git bisect if the BUILD_DIR is a git repository.
+# The MIN_CONFIG will be used for all builds of the bisect. The build type
+# used for bisecting is oldconfig.
+#
+# The option BUILD_TYPE will be ignored.
+#
+# BISECT_TYPE is the type of test to perform:
+# build - bad fails to build
+# boot - bad builds but fails to boot
+# test - bad boots but fails a test
+#
+# BISECT_GOOD is the commit (SHA1) to label as good (accepts all git good commit types)
+# BISECT_BAD is the commit to label as bad (accepts all git bad commit types)
+#
+# The above three options are required for a bisect operation.
+#
+# BISECT_REPLAY = /path/to/replay/file (optional, default undefined)
+#
+# If an operation failed in the bisect that was not expected to
+# fail. Then the test ends. The state of the BUILD_DIR will be
+# left off at where the failure occurred. You can examine the
+# reason for the failure, and perhaps even find a git commit
+# that would work to continue with. You can run:
+#
+# git bisect log > /path/to/replay/file
+#
+# The adding:
+#
+# BISECT_REPLAY= /path/to/replay/file
+#
+# And running the test again. The test will perform the initial
+# git bisect start, git bisect good, and git bisect bad, and
+# then it will run git bisect replay on this file, before
+# continuing with the bisect.
+#
+# BISECT_START = commit (optional, default undefined)
+#
+# As with BISECT_REPLAY, if the test failed on a commit that
+# just happen to have a bad commit in the middle of the bisect,
+# and you need to skip it. If BISECT_START is defined, it
+# will checkout that commit after doing the initial git bisect start,
+# git bisect good, git bisect bad, and running the git bisect replay
+# if the BISECT_REPLAY is set.
+#
+# BISECT_REVERSE = 1 (optional, default 0)
+#
+# In those strange instances where it was broken forever
+# and you are trying to find where it started to work!
+# Set BISECT_GOOD to the commit that was last known to fail
+# Set BISECT_BAD to the commit that is known to start working.
+# With BISECT_REVERSE = 1, The test will consider failures as
+# good, and success as bad.
+#
+# BISECT_CHECK = 1 (optional, default 0)
+#
+# Just to be sure the good is good and bad is bad, setting
+# BISECT_CHECK to 1 will start the bisect by first checking
+# out BISECT_BAD and makes sure it fails, then it will check
+# out BISECT_GOOD and makes sure it succeeds before starting
+# the bisect (it works for BISECT_REVERSE too).
+#
+# You can limit the test to just check BISECT_GOOD or
+# BISECT_BAD with BISECT_CHECK = good or
+# BISECT_CHECK = bad, respectively.
+#
+# Example:
+# TEST_START
+# TEST_TYPE = bisect
+# BISECT_GOOD = v2.6.36
+# BISECT_BAD = b5153163ed580e00c67bdfecb02b2e3843817b3e
+# BISECT_TYPE = build
+# MIN_CONFIG = /home/test/config-bisect
+#
+#
+#
+# For TEST_TYPE = config_bisect
+#
+# In those cases that you have two different configs. One of them
+# work, the other does not, and you do not know what config causes
+# the problem.
+# The TEST_TYPE config_bisect will bisect the bad config looking for
+# what config causes the failure.
+#
+# The way it works is this:
+#
+# First it finds a config to work with. Since a different version, or
+# MIN_CONFIG may cause different dependecies, it must run through this
+# preparation.
+#
+# Overwrites any config set in the bad config with a config set in
+# either the MIN_CONFIG or ADD_CONFIG. Thus, make sure these configs
+# are minimal and do not disable configs you want to test:
+# (ie. # CONFIG_FOO is not set).
+#
+# An oldconfig is run on the bad config and any new config that
+# appears will be added to the configs to test.
+#
+# Finally, it generates a config with the above result and runs it
+# again through make oldconfig to produce a config that should be
+# satisfied by kconfig.
+#
+# Then it starts the bisect.
+#
+# The configs to test are cut in half. If all the configs in this
+# half depend on a config in the other half, then the other half
+# is tested instead. If no configs are enabled by either half, then
+# this means a circular dependency exists and the test fails.
+#
+# A config is created with the test half, and the bisect test is run.
+#
+# If the bisect succeeds, then all configs in the generated config
+# are removed from the configs to test and added to the configs that
+# will be enabled for all builds (they will be enabled, but not be part
+# of the configs to examine).
+#
+# If the bisect fails, then all test configs that were not enabled by
+# the config file are removed from the test. These configs will not
+# be enabled in future tests. Since current config failed, we consider
+# this to be a subset of the config that we started with.
+#
+# When we are down to one config, it is considered the bad config.
+#
+# Note, the config chosen may not be the true bad config. Due to
+# dependencies and selections of the kbuild system, mulitple
+# configs may be needed to cause a failure. If you disable the
+# config that was found and restart the test, if the test fails
+# again, it is recommended to rerun the config_bisect with a new
+# bad config without the found config enabled.
+#
+# The option BUILD_TYPE will be ignored.
+#
+# CONFIG_BISECT_TYPE is the type of test to perform:
+# build - bad fails to build
+# boot - bad builds but fails to boot
+# test - bad boots but fails a test
+#
+# CONFIG_BISECT is the config that failed to boot
+#
+# Example:
+# TEST_START
+# TEST_TYPE = config_bisect
+# CONFIG_BISECT_TYPE = build
+# CONFIG_BISECT = /home/test/˘onfig-bad
+# MIN_CONFIG = /home/test/config-min
+#
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
new file mode 100644
index 00000000000..d1d442ed106
--- /dev/null
+++ b/tools/virtio/Makefile
@@ -0,0 +1,12 @@
+all: test mod
+test: virtio_test
+virtio_test: virtio_ring.o virtio_test.o
+CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -MMD
+vpath %.c ../../drivers/virtio
+mod:
+ ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test
+.PHONY: all test mod clean
+clean:
+ ${RM} *.o vhost_test/*.o vhost_test/.*.cmd \
+ vhost_test/Module.symvers vhost_test/modules.order *.d
+-include *.d
diff --git a/tools/virtio/linux/device.h b/tools/virtio/linux/device.h
new file mode 100644
index 00000000000..4ad7e1df0db
--- /dev/null
+++ b/tools/virtio/linux/device.h
@@ -0,0 +1,2 @@
+#ifndef LINUX_DEVICE_H
+#endif
diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h
new file mode 100644
index 00000000000..81baeac8ae4
--- /dev/null
+++ b/tools/virtio/linux/slab.h
@@ -0,0 +1,2 @@
+#ifndef LINUX_SLAB_H
+#endif
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
new file mode 100644
index 00000000000..669bcdd4580
--- /dev/null
+++ b/tools/virtio/linux/virtio.h
@@ -0,0 +1,223 @@
+#ifndef LINUX_VIRTIO_H
+#define LINUX_VIRTIO_H
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include <linux/types.h>
+#include <errno.h>
+
+typedef unsigned long long dma_addr_t;
+
+struct scatterlist {
+ unsigned long page_link;
+ unsigned int offset;
+ unsigned int length;
+ dma_addr_t dma_address;
+};
+
+struct page {
+ unsigned long long dummy;
+};
+
+#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
+
+/* Physical == Virtual */
+#define virt_to_phys(p) ((unsigned long)p)
+#define phys_to_virt(a) ((void *)(unsigned long)(a))
+/* Page address: Virtual / 4K */
+#define virt_to_page(p) ((struct page*)((virt_to_phys(p) / 4096) * \
+ sizeof(struct page)))
+#define offset_in_page(p) (((unsigned long)p) % 4096)
+#define sg_phys(sg) ((sg->page_link & ~0x3) / sizeof(struct page) * 4096 + \
+ sg->offset)
+static inline void sg_mark_end(struct scatterlist *sg)
+{
+ /*
+ * Set termination bit, clear potential chain bit
+ */
+ sg->page_link |= 0x02;
+ sg->page_link &= ~0x01;
+}
+static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+ memset(sgl, 0, sizeof(*sgl) * nents);
+ sg_mark_end(&sgl[nents - 1]);
+}
+static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+{
+ unsigned long page_link = sg->page_link & 0x3;
+
+ /*
+ * In order for the low bit stealing approach to work, pages
+ * must be aligned at a 32-bit boundary as a minimum.
+ */
+ BUG_ON((unsigned long) page & 0x03);
+ sg->page_link = page_link | (unsigned long) page;
+}
+
+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ sg_assign_page(sg, page);
+ sg->offset = offset;
+ sg->length = len;
+}
+
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
+ unsigned int buflen)
+{
+ sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
+static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
+{
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, buf, buflen);
+}
+
+typedef __u16 u16;
+
+typedef enum {
+ GFP_KERNEL,
+ GFP_ATOMIC,
+} gfp_t;
+typedef enum {
+ IRQ_NONE,
+ IRQ_HANDLED
+} irqreturn_t;
+
+static inline void *kmalloc(size_t s, gfp_t gfp)
+{
+ return malloc(s);
+}
+
+static inline void kfree(void *p)
+{
+ free(p);
+}
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+#define uninitialized_var(x) x = x
+
+# ifndef likely
+# define likely(x) (__builtin_expect(!!(x), 1))
+# endif
+# ifndef unlikely
+# define unlikely(x) (__builtin_expect(!!(x), 0))
+# endif
+
+#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#ifdef DEBUG
+#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#else
+#define pr_debug(format, ...) do {} while (0)
+#endif
+#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+
+/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
+#define list_add_tail(a, b) do {} while (0)
+#define list_del(a) do {} while (0)
+
+#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
+#define BITS_PER_BYTE 8
+#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
+#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+/* TODO: Not atomic as it should be:
+ * we don't use this for anything important. */
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+ *p &= ~mask;
+}
+
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+ return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+/* The only feature we care to support */
+#define virtio_has_feature(dev, feature) \
+ test_bit((feature), (dev)->features)
+/* end of stubs */
+
+struct virtio_device {
+ void *dev;
+ unsigned long features[1];
+};
+
+struct virtqueue {
+ /* TODO: commented as list macros are empty stubs for now.
+ * Broken but enough for virtio_ring.c
+ * struct list_head list; */
+ void (*callback)(struct virtqueue *vq);
+ const char *name;
+ struct virtio_device *vdev;
+ void *priv;
+};
+
+#define EXPORT_SYMBOL_GPL(__EXPORT_SYMBOL_GPL_name) \
+ void __EXPORT_SYMBOL_GPL##__EXPORT_SYMBOL_GPL_name() { \
+}
+#define MODULE_LICENSE(__MODULE_LICENSE_value) \
+ const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value
+
+#define CONFIG_SMP
+
+#if defined(__i386__) || defined(__x86_64__)
+#define barrier() asm volatile("" ::: "memory")
+#define mb() __sync_synchronize()
+
+#define smp_mb() mb()
+# define smp_rmb() barrier()
+# define smp_wmb() barrier()
+#else
+#error Please fill in barrier macros
+#endif
+
+/* Interfaces exported by virtio_ring. */
+int virtqueue_add_buf_gfp(struct virtqueue *vq,
+ struct scatterlist sg[],
+ unsigned int out_num,
+ unsigned int in_num,
+ void *data,
+ gfp_t gfp);
+
+static inline int virtqueue_add_buf(struct virtqueue *vq,
+ struct scatterlist sg[],
+ unsigned int out_num,
+ unsigned int in_num,
+ void *data)
+{
+ return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
+}
+
+void virtqueue_kick(struct virtqueue *vq);
+
+void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
+
+void virtqueue_disable_cb(struct virtqueue *vq);
+
+bool virtqueue_enable_cb(struct virtqueue *vq);
+
+void *virtqueue_detach_unused_buf(struct virtqueue *vq);
+struct virtqueue *vring_new_virtqueue(unsigned int num,
+ unsigned int vring_align,
+ struct virtio_device *vdev,
+ void *pages,
+ void (*notify)(struct virtqueue *vq),
+ void (*callback)(struct virtqueue *vq),
+ const char *name);
+void vring_del_virtqueue(struct virtqueue *vq);
+
+#endif
diff --git a/tools/virtio/vhost_test/Makefile b/tools/virtio/vhost_test/Makefile
new file mode 100644
index 00000000000..a1d35b81b31
--- /dev/null
+++ b/tools/virtio/vhost_test/Makefile
@@ -0,0 +1,2 @@
+obj-m += vhost_test.o
+EXTRA_CFLAGS += -Idrivers/vhost
diff --git a/tools/virtio/vhost_test/vhost_test.c b/tools/virtio/vhost_test/vhost_test.c
new file mode 100644
index 00000000000..18735189e62
--- /dev/null
+++ b/tools/virtio/vhost_test/vhost_test.c
@@ -0,0 +1 @@
+#include "test.c"
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
new file mode 100644
index 00000000000..df0c6d2c386
--- /dev/null
+++ b/tools/virtio/virtio_test.c
@@ -0,0 +1,248 @@
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <linux/vhost.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ring.h>
+#include "../../drivers/vhost/test.h"
+
+struct vq_info {
+ int kick;
+ int call;
+ int num;
+ int idx;
+ void *ring;
+ /* copy used for control */
+ struct vring vring;
+ struct virtqueue *vq;
+};
+
+struct vdev_info {
+ struct virtio_device vdev;
+ int control;
+ struct pollfd fds[1];
+ struct vq_info vqs[1];
+ int nvqs;
+ void *buf;
+ size_t buf_size;
+ struct vhost_memory *mem;
+};
+
+void vq_notify(struct virtqueue *vq)
+{
+ struct vq_info *info = vq->priv;
+ unsigned long long v = 1;
+ int r;
+ r = write(info->kick, &v, sizeof v);
+ assert(r == sizeof v);
+}
+
+void vq_callback(struct virtqueue *vq)
+{
+}
+
+
+void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
+{
+ struct vhost_vring_state state = { .index = info->idx };
+ struct vhost_vring_file file = { .index = info->idx };
+ unsigned long long features = dev->vdev.features[0];
+ struct vhost_vring_addr addr = {
+ .index = info->idx,
+ .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
+ .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
+ .used_user_addr = (uint64_t)(unsigned long)info->vring.used,
+ };
+ int r;
+ r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+ assert(r >= 0);
+ state.num = info->vring.num;
+ r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+ assert(r >= 0);
+ state.num = 0;
+ r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+ assert(r >= 0);
+ r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+ assert(r >= 0);
+ file.fd = info->kick;
+ r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+ assert(r >= 0);
+ file.fd = info->call;
+ r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+ assert(r >= 0);
+}
+
+static void vq_info_add(struct vdev_info *dev, int num)
+{
+ struct vq_info *info = &dev->vqs[dev->nvqs];
+ int r;
+ info->idx = dev->nvqs;
+ info->kick = eventfd(0, EFD_NONBLOCK);
+ info->call = eventfd(0, EFD_NONBLOCK);
+ r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
+ assert(r >= 0);
+ memset(info->ring, 0, vring_size(num, 4096));
+ vring_init(&info->vring, num, info->ring, 4096);
+ info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring,
+ vq_notify, vq_callback, "test");
+ assert(info->vq);
+ info->vq->priv = info;
+ vhost_vq_setup(dev, info);
+ dev->fds[info->idx].fd = info->call;
+ dev->fds[info->idx].events = POLLIN;
+ dev->nvqs++;
+}
+
+static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
+{
+ int r;
+ memset(dev, 0, sizeof *dev);
+ dev->vdev.features[0] = features;
+ dev->vdev.features[1] = features >> 32;
+ dev->buf_size = 1024;
+ dev->buf = malloc(dev->buf_size);
+ assert(dev->buf);
+ dev->control = open("/dev/vhost-test", O_RDWR);
+ assert(dev->control >= 0);
+ r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
+ assert(r >= 0);
+ dev->mem = malloc(offsetof(struct vhost_memory, regions) +
+ sizeof dev->mem->regions[0]);
+ assert(dev->mem);
+ memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
+ sizeof dev->mem->regions[0]);
+ dev->mem->nregions = 1;
+ dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
+ dev->mem->regions[0].userspace_addr = (long)dev->buf;
+ dev->mem->regions[0].memory_size = dev->buf_size;
+ r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ assert(r >= 0);
+}
+
+/* TODO: this is pretty bad: we get a cache line bounce
+ * for the wait queue on poll and another one on read,
+ * plus the read which is there just to clear the
+ * current state. */
+static void wait_for_interrupt(struct vdev_info *dev)
+{
+ int i;
+ unsigned long long val;
+ poll(dev->fds, dev->nvqs, -1);
+ for (i = 0; i < dev->nvqs; ++i)
+ if (dev->fds[i].revents & POLLIN) {
+ read(dev->fds[i].fd, &val, sizeof val);
+ }
+}
+
+static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
+{
+ struct scatterlist sl;
+ long started = 0, completed = 0;
+ long completed_before;
+ int r, test = 1;
+ unsigned len;
+ long long spurious = 0;
+ r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+ assert(r >= 0);
+ for (;;) {
+ virtqueue_disable_cb(vq->vq);
+ completed_before = completed;
+ do {
+ if (started < bufs) {
+ sg_init_one(&sl, dev->buf, dev->buf_size);
+ r = virtqueue_add_buf(vq->vq, &sl, 1, 0,
+ dev->buf + started);
+ if (likely(r >= 0)) {
+ ++started;
+ virtqueue_kick(vq->vq);
+ }
+ } else
+ r = -1;
+
+ /* Flush out completed bufs if any */
+ if (virtqueue_get_buf(vq->vq, &len)) {
+ ++completed;
+ r = 0;
+ }
+
+ } while (r >= 0);
+ if (completed == completed_before)
+ ++spurious;
+ assert(completed <= bufs);
+ assert(started <= bufs);
+ if (completed == bufs)
+ break;
+ if (virtqueue_enable_cb(vq->vq)) {
+ wait_for_interrupt(dev);
+ }
+ }
+ test = 0;
+ r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+ assert(r >= 0);
+ fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious);
+}
+
+const char optstring[] = "h";
+const struct option longopts[] = {
+ {
+ .name = "help",
+ .val = 'h',
+ },
+ {
+ .name = "indirect",
+ .val = 'I',
+ },
+ {
+ .name = "no-indirect",
+ .val = 'i',
+ },
+ {
+ }
+};
+
+static void help()
+{
+ fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct vdev_info dev;
+ unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC;
+ int o;
+
+ for (;;) {
+ o = getopt_long(argc, argv, optstring, longopts, NULL);
+ switch (o) {
+ case -1:
+ goto done;
+ case '?':
+ help();
+ exit(2);
+ case 'h':
+ help();
+ goto done;
+ case 'i':
+ features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+done:
+ vdev_info_init(&dev, features);
+ vq_info_add(&dev, 256);
+ run_test(&dev, &dev.vqs[0], 0x100000);
+ return 0;
+}