diff options
Diffstat (limited to 'driver/gator_events_perf_pmu.c')
-rw-r--r-- | driver/gator_events_perf_pmu.c | 520 |
1 files changed, 361 insertions, 159 deletions
diff --git a/driver/gator_events_perf_pmu.c b/driver/gator_events_perf_pmu.c index ce3a40f..34a6bc7 100644 --- a/driver/gator_events_perf_pmu.c +++ b/driver/gator_events_perf_pmu.c @@ -1,5 +1,5 @@ /** - * Copyright (C) ARM Limited 2010-2012. All rights reserved. + * Copyright (C) ARM Limited 2010-2013. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,49 +13,74 @@ // gator_events_armvX.c is used for Linux 2.6.x #if GATOR_PERF_PMU_SUPPORT -static const char *pmnc_name; -int pmnc_counters; -int ccnt = 0; +extern bool event_based_sampling; -#define CNTMAX (6+1) +#define CNTMAX 16 +#define CCI_400 4 +// + 1 for the cci-400 cycles counter +#define UCCNT (CCI_400 + 1) -static DEFINE_MUTEX(perf_mutex); +struct gator_attr { + char name[40]; + unsigned long enabled; + unsigned long type; + unsigned long event; + unsigned long count; + unsigned long key; +}; -unsigned long pmnc_enabled[CNTMAX]; -unsigned long pmnc_event[CNTMAX]; -unsigned long pmnc_count[CNTMAX]; -unsigned long pmnc_key[CNTMAX]; +static struct gator_attr attrs[CNTMAX]; +static int attr_count; +static struct gator_attr uc_attrs[UCCNT]; +static int uc_attr_count; + +struct gator_event { + int curr; + int prev; + int prev_delta; + bool zero; + struct perf_event *pevent; + struct perf_event_attr *pevent_attr; +}; -static DEFINE_PER_CPU(int[CNTMAX], perfCurr); -static DEFINE_PER_CPU(int[CNTMAX], perfPrev); -static DEFINE_PER_CPU(int[CNTMAX], perfPrevDelta); -static DEFINE_PER_CPU(int[CNTMAX * 2], perfCnt); -static DEFINE_PER_CPU(struct perf_event *[CNTMAX], pevent); -static DEFINE_PER_CPU(struct perf_event_attr *[CNTMAX], pevent_attr); +static DEFINE_PER_CPU(struct gator_event[CNTMAX], events); +static struct gator_event uc_events[UCCNT]; +static DEFINE_PER_CPU(int[(CNTMAX + UCCNT)*2], perf_cnt); static void gator_events_perf_pmu_stop(void); -static int gator_events_perf_pmu_create_files(struct super_block *sb, struct dentry *root) +static int __create_files(struct super_block *sb, struct dentry *root, struct gator_attr *const attr) { struct dentry *dir; - int i; - for (i = 0; i < pmnc_counters; i++) { - char buf[40]; - if (i == 0) { - snprintf(buf, sizeof buf, "%s_ccnt", pmnc_name); - } else { - snprintf(buf, sizeof buf, "%s_cnt%d", pmnc_name, i - 1); - } - dir = gatorfs_mkdir(sb, root, buf); - if (!dir) { + if (attr->name[0] == '\0') { + return 0; + } + dir = gatorfs_mkdir(sb, root, attr->name); + if (!dir) { + return -1; + } + gatorfs_create_ulong(sb, dir, "enabled", &attr->enabled); + gatorfs_create_ulong(sb, dir, "count", &attr->count); + gatorfs_create_ro_ulong(sb, dir, "key", &attr->key); + gatorfs_create_ulong(sb, dir, "event", &attr->event); + + return 0; +} + +static int gator_events_perf_pmu_create_files(struct super_block *sb, struct dentry *root) +{ + int cnt; + + for (cnt = 0; cnt < attr_count; cnt++) { + if (__create_files(sb, root, &attrs[cnt]) != 0) { return -1; } - gatorfs_create_ulong(sb, dir, "enabled", &pmnc_enabled[i]); - gatorfs_create_ulong(sb, dir, "count", &pmnc_count[i]); - gatorfs_create_ro_ulong(sb, dir, "key", &pmnc_key[i]); - if (i > 0) { - gatorfs_create_ulong(sb, dir, "event", &pmnc_event[i]); + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + if (__create_files(sb, root, &uc_attrs[cnt]) != 0) { + return -1; } } @@ -80,177 +105,268 @@ static void dummy_handler(struct perf_event *event, struct perf_sample_data *dat // Required as perf_event_create_kernel_counter() requires an overflow handler, even though all we do is poll } -static int gator_events_perf_pmu_online(int **buffer) +static int gator_events_perf_pmu_read(int **buffer); + +static int gator_events_perf_pmu_online(int **buffer, bool migrate) { - int cnt, len = 0, cpu = smp_processor_id(); + return gator_events_perf_pmu_read(buffer); +} - // read the counters and toss the invalid data, return zero instead - for (cnt = 0; cnt < pmnc_counters; cnt++) { - struct perf_event *ev = per_cpu(pevent, cpu)[cnt]; - if (ev != NULL && ev->state == PERF_EVENT_STATE_ACTIVE) { - ev->pmu->read(ev); - per_cpu(perfPrev, cpu)[cnt] = per_cpu(perfCurr, cpu)[cnt] = local64_read(&ev->count); - per_cpu(perfPrevDelta, cpu)[cnt] = 0; - per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; - per_cpu(perfCnt, cpu)[len++] = 0; - } +static void __online_dispatch(int cpu, bool migrate, struct gator_attr *const attr, struct gator_event *const event) +{ + perf_overflow_handler_t handler; + + event->zero = true; + + if (event->pevent != NULL || event->pevent_attr == 0 || migrate) { + return; } - if (buffer) - *buffer = per_cpu(perfCnt, cpu); + if (attr->count > 0) { + handler = ebs_overflow_handler; + } else { + handler = dummy_handler; + } - return len; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler); +#else + event->pevent = perf_event_create_kernel_counter(event->pevent_attr, cpu, 0, handler, 0); +#endif + if (IS_ERR(event->pevent)) { + pr_debug("gator: unable to online a counter on cpu %d\n", cpu); + event->pevent = NULL; + return; + } + + if (event->pevent->state != PERF_EVENT_STATE_ACTIVE) { + pr_debug("gator: inactive counter on cpu %d\n", cpu); + perf_event_release_kernel(event->pevent); + event->pevent = NULL; + return; + } } -static void gator_events_perf_pmu_online_dispatch(int cpu) +static void gator_events_perf_pmu_online_dispatch(int cpu, bool migrate) { int cnt; - perf_overflow_handler_t handler; - for (cnt = 0; cnt < pmnc_counters; cnt++) { - if (per_cpu(pevent, cpu)[cnt] != NULL || per_cpu(pevent_attr, cpu)[cnt] == 0) - continue; + cpu = pcpu_to_lcpu(cpu); - if (pmnc_count[cnt] > 0) { - handler = ebs_overflow_handler; - } else { - handler = dummy_handler; - } + for (cnt = 0; cnt < attr_count; cnt++) { + __online_dispatch(cpu, migrate, &attrs[cnt], &per_cpu(events, cpu)[cnt]); + } -#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) - per_cpu(pevent, cpu)[cnt] = perf_event_create_kernel_counter(per_cpu(pevent_attr, cpu)[cnt], cpu, 0, handler); -#else - per_cpu(pevent, cpu)[cnt] = perf_event_create_kernel_counter(per_cpu(pevent_attr, cpu)[cnt], cpu, 0, handler, 0); -#endif - if (IS_ERR(per_cpu(pevent, cpu)[cnt])) { - pr_debug("gator: unable to online a counter on cpu %d\n", cpu); - per_cpu(pevent, cpu)[cnt] = NULL; - continue; + if (cpu == 0) { + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __online_dispatch(cpu, migrate, &uc_attrs[cnt], &uc_events[cnt]); } + } +} - if (per_cpu(pevent, cpu)[cnt]->state != PERF_EVENT_STATE_ACTIVE) { - pr_debug("gator: inactive counter on cpu %d\n", cpu); - perf_event_release_kernel(per_cpu(pevent, cpu)[cnt]); - per_cpu(pevent, cpu)[cnt] = NULL; - continue; - } +static void __offline_dispatch(int cpu, struct gator_event *const event) +{ + struct perf_event *pe = NULL; + + if (event->pevent) { + pe = event->pevent; + event->pevent = NULL; + } + + if (pe) { + perf_event_release_kernel(pe); } } -static void gator_events_perf_pmu_offline_dispatch(int cpu) +static void gator_events_perf_pmu_offline_dispatch(int cpu, bool migrate) { int cnt; - struct perf_event *pe; - for (cnt = 0; cnt < pmnc_counters; cnt++) { - pe = NULL; - mutex_lock(&perf_mutex); - if (per_cpu(pevent, cpu)[cnt]) { - pe = per_cpu(pevent, cpu)[cnt]; - per_cpu(pevent, cpu)[cnt] = NULL; + if (migrate) { + return; + } + cpu = pcpu_to_lcpu(cpu); + + for (cnt = 0; cnt < attr_count; cnt++) { + __offline_dispatch(cpu, &per_cpu(events, cpu)[cnt]); + } + + if (cpu == 0) { + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __offline_dispatch(cpu, &uc_events[cnt]); } - mutex_unlock(&perf_mutex); + } +} - if (pe) { - perf_event_release_kernel(pe); +static int __check_ebs(struct gator_attr *const attr) +{ + if (attr->count > 0) { + if (!event_based_sampling) { + event_based_sampling = true; + } else { + printk(KERN_WARNING "gator: Only one ebs counter is allowed\n"); + return -1; } } + + return 0; +} + +static int __start(struct gator_attr *const attr, struct gator_event *const event) +{ + u32 size = sizeof(struct perf_event_attr); + + event->pevent = NULL; + if (!attr->enabled) { // Skip disabled counters + return 0; + } + + event->prev = 0; + event->curr = 0; + event->prev_delta = 0; + event->pevent_attr = kmalloc(size, GFP_KERNEL); + if (!event->pevent_attr) { + gator_events_perf_pmu_stop(); + return -1; + } + + memset(event->pevent_attr, 0, size); + event->pevent_attr->type = attr->type; + event->pevent_attr->size = size; + event->pevent_attr->config = attr->event; + event->pevent_attr->sample_period = attr->count; + event->pevent_attr->pinned = 1; + + return 0; } static int gator_events_perf_pmu_start(void) { int cnt, cpu; - u32 size = sizeof(struct perf_event_attr); - int found_ebs = false; - - for (cnt = 0; cnt < pmnc_counters; cnt++) { - if (pmnc_count[cnt] > 0) { - if (!found_ebs) { - found_ebs = true; - } else { - // Only one ebs counter is allowed - return -1; - } + + event_based_sampling = false; + for (cnt = 0; cnt < attr_count; cnt++) { + if (__check_ebs(&attrs[cnt]) != 0) { + return -1; + } + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + if (__check_ebs(&uc_attrs[cnt]) != 0) { + return -1; } } for_each_present_cpu(cpu) { - for (cnt = 0; cnt < pmnc_counters; cnt++) { - per_cpu(pevent, cpu)[cnt] = NULL; - if (!pmnc_enabled[cnt]) // Skip disabled counters - continue; - - per_cpu(perfPrev, cpu)[cnt] = 0; - per_cpu(perfCurr, cpu)[cnt] = 0; - per_cpu(perfPrevDelta, cpu)[cnt] = 0; - per_cpu(pevent_attr, cpu)[cnt] = kmalloc(size, GFP_KERNEL); - if (!per_cpu(pevent_attr, cpu)[cnt]) { - gator_events_perf_pmu_stop(); + for (cnt = 0; cnt < attr_count; cnt++) { + if (__start(&attrs[cnt], &per_cpu(events, cpu)[cnt]) != 0) { return -1; } + } + } - memset(per_cpu(pevent_attr, cpu)[cnt], 0, size); - per_cpu(pevent_attr, cpu)[cnt]->type = PERF_TYPE_RAW; - per_cpu(pevent_attr, cpu)[cnt]->size = size; - per_cpu(pevent_attr, cpu)[cnt]->config = pmnc_event[cnt]; - per_cpu(pevent_attr, cpu)[cnt]->sample_period = pmnc_count[cnt]; - per_cpu(pevent_attr, cpu)[cnt]->pinned = 1; - - // handle special case for ccnt - if (cnt == ccnt) { - per_cpu(pevent_attr, cpu)[cnt]->type = PERF_TYPE_HARDWARE; - per_cpu(pevent_attr, cpu)[cnt]->config = PERF_COUNT_HW_CPU_CYCLES; - } + for (cnt = 0; cnt < uc_attr_count; cnt++) { + if (__start(&uc_attrs[cnt], &uc_events[cnt]) != 0) { + return -1; } } return 0; } +static void __event_stop(struct gator_event *const event) +{ + if (event->pevent_attr) { + kfree(event->pevent_attr); + event->pevent_attr = NULL; + } +} + +static void __attr_stop(struct gator_attr *const attr) +{ + attr->enabled = 0; + attr->event = 0; + attr->count = 0; +} + static void gator_events_perf_pmu_stop(void) { unsigned int cnt, cpu; for_each_present_cpu(cpu) { - for (cnt = 0; cnt < pmnc_counters; cnt++) { - if (per_cpu(pevent_attr, cpu)[cnt]) { - kfree(per_cpu(pevent_attr, cpu)[cnt]); - per_cpu(pevent_attr, cpu)[cnt] = NULL; - } + for (cnt = 0; cnt < attr_count; cnt++) { + __event_stop(&per_cpu(events, cpu)[cnt]); } } - for (cnt = 0; cnt < pmnc_counters; cnt++) { - pmnc_enabled[cnt] = 0; - pmnc_event[cnt] = 0; - pmnc_count[cnt] = 0; + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __event_stop(&uc_events[cnt]); + } + + for (cnt = 0; cnt < attr_count; cnt++) { + __attr_stop(&attrs[cnt]); + } + + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __attr_stop(&uc_attrs[cnt]); } } -static int gator_events_perf_pmu_read(int **buffer) +static void __read(int *const len, int cpu, struct gator_attr *const attr, struct gator_event *const event) { - int cnt, delta, len = 0; - int cpu = smp_processor_id(); - - for (cnt = 0; cnt < pmnc_counters; cnt++) { - struct perf_event *ev = per_cpu(pevent, cpu)[cnt]; - if (ev != NULL && ev->state == PERF_EVENT_STATE_ACTIVE) { + int delta; + + struct perf_event *const ev = event->pevent; + if (ev != NULL && ev->state == PERF_EVENT_STATE_ACTIVE) { + /* After creating the perf counter in __online_dispatch, there + * is a race condition between gator_events_perf_pmu_online and + * gator_events_perf_pmu_read. So have + * gator_events_perf_pmu_online call gator_events_perf_pmu_read + * and in __read check to see if it's the first call after + * __online_dispatch and if so, run the online code. + */ + if (event->zero) { + ev->pmu->read(ev); + event->prev = event->curr = local64_read(&ev->count); + event->prev_delta = 0; + per_cpu(perf_cnt, cpu)[(*len)++] = attr->key; + per_cpu(perf_cnt, cpu)[(*len)++] = 0; + event->zero = false; + } else { ev->pmu->read(ev); - per_cpu(perfCurr, cpu)[cnt] = local64_read(&ev->count); - delta = per_cpu(perfCurr, cpu)[cnt] - per_cpu(perfPrev, cpu)[cnt]; - if (delta != 0 || delta != per_cpu(perfPrevDelta, cpu)[cnt]) { - per_cpu(perfPrevDelta, cpu)[cnt] = delta; - per_cpu(perfPrev, cpu)[cnt] = per_cpu(perfCurr, cpu)[cnt]; - per_cpu(perfCnt, cpu)[len++] = pmnc_key[cnt]; - if (delta < 0) + event->curr = local64_read(&ev->count); + delta = event->curr - event->prev; + if (delta != 0 || delta != event->prev_delta) { + event->prev_delta = delta; + event->prev = event->curr; + per_cpu(perf_cnt, cpu)[(*len)++] = attr->key; + if (delta < 0) { delta *= -1; - per_cpu(perfCnt, cpu)[len++] = delta; + } + per_cpu(perf_cnt, cpu)[(*len)++] = delta; } } } +} - if (buffer) - *buffer = per_cpu(perfCnt, cpu); +static int gator_events_perf_pmu_read(int **buffer) +{ + int cnt, len = 0; + const int cpu = get_logical_cpu(); + + for (cnt = 0; cnt < attr_count; cnt++) { + __read(&len, cpu, &attrs[cnt], &per_cpu(events, cpu)[cnt]); + } + + if (cpu == 0) { + for (cnt = 0; cnt < uc_attr_count; cnt++) { + __read(&len, cpu, &uc_attrs[cnt], &uc_events[cnt]); + } + } + + if (buffer) { + *buffer = per_cpu(perf_cnt, cpu); + } return len; } @@ -265,30 +381,116 @@ static struct gator_interface gator_events_perf_pmu_interface = { .read = gator_events_perf_pmu_read, }; +static void __attr_init(struct gator_attr *const attr) +{ + attr->name[0] = '\0'; + attr->enabled = 0; + attr->type = 0; + attr->event = 0; + attr->count = 0; + attr->key = gator_events_get_key(); +} + +static void gator_events_perf_pmu_cci_init(const int type) +{ + int cnt; + + strncpy(uc_attrs[uc_attr_count].name, "cci-400_ccnt", sizeof(uc_attrs[uc_attr_count].name)); + uc_attrs[uc_attr_count].type = type; + ++uc_attr_count; + + for (cnt = 0; cnt < CCI_400; ++cnt, ++uc_attr_count) { + struct gator_attr *const attr = &uc_attrs[uc_attr_count]; + snprintf(attr->name, sizeof(attr->name), "cci-400_cnt%d", cnt); + attr->type = type; + } +} + +static void gator_events_perf_pmu_cpu_init(const struct gator_cpu *const gator_cpu, const int type) +{ + int cnt; + + snprintf(attrs[attr_count].name, sizeof(attrs[attr_count].name), "%s_ccnt", gator_cpu->pmnc_name); + attrs[attr_count].type = type; + ++attr_count; + + for (cnt = 0; cnt < gator_cpu->pmnc_counters; ++cnt, ++attr_count) { + struct gator_attr *const attr = &attrs[attr_count]; + snprintf(attr->name, sizeof(attr->name), "%s_cnt%d", gator_cpu->pmnc_name, cnt); + attr->type = type; + } +} + int gator_events_perf_pmu_init(void) { - unsigned int cnt; - const u32 cpuid = gator_cpuid(); - - for (cnt = 0; gator_cpus[cnt].cpuid != 0; ++cnt) { - if (gator_cpus[cnt].cpuid == cpuid) { - pmnc_name = gator_cpus[cnt].pmnc_name; - pmnc_counters = gator_cpus[cnt].pmnc_counters; - ccnt = gator_cpus[cnt].ccnt; + struct perf_event_attr pea; + struct perf_event *pe; + const struct gator_cpu *gator_cpu; + int type; + int cpu; + int cnt; + bool found_cpu = false; + + for (cnt = 0; cnt < CNTMAX; cnt++) { + __attr_init(&attrs[cnt]); + } + for (cnt = 0; cnt < UCCNT; cnt++) { + __attr_init(&uc_attrs[cnt]); + } + + memset(&pea, 0, sizeof(pea)); + pea.size = sizeof(pea); + pea.config = 0xFF; + attr_count = 0; + uc_attr_count = 0; + for (type = PERF_TYPE_MAX; type < 0x20; ++type) { + pea.type = type; + + // A particular PMU may work on some but not all cores, so try on each core + pe = NULL; + for_each_present_cpu(cpu) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler); +#else + pe = perf_event_create_kernel_counter(&pea, cpu, 0, dummy_handler, 0); +#endif + if (!IS_ERR(pe)) { + break; + } + } + // Assume that valid PMUs are contigious + if (IS_ERR(pe)) { break; } + + if (pe->pmu != NULL && type == pe->pmu->type) { + if (strcmp("CCI", pe->pmu->name) == 0) { + gator_events_perf_pmu_cci_init(type); + } else if ((gator_cpu = gator_find_cpu_by_pmu_name(pe->pmu->name)) != NULL) { + found_cpu = true; + gator_events_perf_pmu_cpu_init(gator_cpu, type); + } + } + + perf_event_release_kernel(pe); } - if (gator_cpus[cnt].cpuid == 0) { - return -1; + + if (!found_cpu) { + const struct gator_cpu *const gator_cpu = gator_find_cpu_by_cpuid(gator_cpuid()); + if (gator_cpu == NULL) { + return -1; + } + gator_events_perf_pmu_cpu_init(gator_cpu, PERF_TYPE_RAW); } - pmnc_counters++; // CNT[n] + CCNT + if (attr_count > CNTMAX) { + printk(KERN_ERR "gator: Too many perf counters\n"); + return -1; + } - for (cnt = 0; cnt < CNTMAX; cnt++) { - pmnc_enabled[cnt] = 0; - pmnc_event[cnt] = 0; - pmnc_count[cnt] = 0; - pmnc_key[cnt] = gator_events_get_key(); + if (uc_attr_count > UCCNT) { + printk(KERN_ERR "gator: Too many perf uncore counters\n"); + return -1; } return gator_events_install(&gator_events_perf_pmu_interface); |