aboutsummaryrefslogtreecommitdiff
path: root/patches
diff options
context:
space:
mode:
authorArjan van de Ven <arjan@linux.intel.com>2010-11-26 12:50:39 -0800
committerArjan van de Ven <arjan@linux.intel.com>2010-11-26 12:50:39 -0800
commit89859fd990c8f73e246b83f2405ef7bc5250049b (patch)
tree275305e5f9716f2acce8a7f712e37a31745ceacd /patches
parentda4a519d692c45aca57e2b31bed369e7ff7be535 (diff)
adding inode dirty tracing....
Diffstat (limited to 'patches')
-rw-r--r--patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch302
-rw-r--r--patches/linux-2.6.37-rc3-vfs-dirty-inode.patch105
2 files changed, 407 insertions, 0 deletions
diff --git a/patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch b/patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch
new file mode 100644
index 0000000..bd451e2
--- /dev/null
+++ b/patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch
@@ -0,0 +1,302 @@
+From: Arjan van de Ven <arjan@linux.intel.com>
+Subject: [PATCH] libata: Add ALPM power state accounting to the AHCI driver
+
+PowerTOP wants to be able to show the user how effective the ALPM link
+power management is for the user. ALPM is worth around 0.5W on a quiet
+link; PowerTOP wants to be able to find cases where the "quiet link" isn't
+actually quiet.
+
+This patch adds state accounting functionality to the AHCI driver for
+PowerTOP to use.
+The parts of the patch are
+1) the sysfs logic of exposing the stats for each state in sysfs
+2) the basic accounting logic that gets update on link change interrupts
+ (or when the user accesses the info from sysfs)
+3) a "accounting enable" flag; in order to get the accounting to work,
+ the driver needs to get phyrdy interrupts on link status changes.
+ Normally and currently this is disabled by the driver when ALPM is
+ on (to reduce overhead); when PowerTOP is running this will need
+ to be on to get usable statistics... hence the sysfs tunable.
+
+The PowerTOP output currently looks like this:
+
+Recent SATA AHCI link activity statistics
+Active Partial Slumber Device name
+ 0.5% 99.5% 0.0% host0
+
+(work to resolve "host0" to a more human readable name is in progress)
+
+Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
+
+---
+ drivers/ata/ahci.h | 15 ++++
+ drivers/ata/libahci.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++-
+ 2 files changed, 200 insertions(+), 2 deletions(-)
+
+Index: linux.trees.git/drivers/ata/ahci.h
+===================================================================
+--- linux.trees.git.orig/drivers/ata/ahci.h
++++ linux.trees.git/drivers/ata/ahci.h
+@@ -262,6 +262,13 @@ struct ahci_em_priv {
+ unsigned long led_state;
+ };
+
++enum ahci_port_states {
++ AHCI_PORT_NOLINK = 0,
++ AHCI_PORT_ACTIVE = 1,
++ AHCI_PORT_PARTIAL = 2,
++ AHCI_PORT_SLUMBER = 3
++};
++
+ struct ahci_port_priv {
+ struct ata_link *active_link;
+ struct ahci_cmd_hdr *cmd_slot;
+@@ -280,6 +287,14 @@ struct ahci_port_priv {
+ int fbs_last_dev; /* save FBS.DEV of last FIS */
+ /* enclosure management info per PM slot */
+ struct ahci_em_priv em_priv[EM_MAX_SLOTS];
++
++ /* ALPM accounting state and stats */
++ unsigned int accounting_active:1;
++ u64 active_jiffies;
++ u64 partial_jiffies;
++ u64 slumber_jiffies;
++ int previous_state;
++ int previous_jiffies;
+ };
+
+ struct ahci_host_priv {
+Index: linux.trees.git/drivers/ata/libahci.c
+===================================================================
+--- linux.trees.git.orig/drivers/ata/libahci.c
++++ linux.trees.git/drivers/ata/libahci.c
+@@ -58,6 +58,17 @@ MODULE_PARM_DESC(ignore_sss, "Ignore sta
+
+ static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy,
+ unsigned hints);
++static ssize_t ahci_alpm_show_active(struct device *dev,
++ struct device_attribute *attr, char *buf);
++static ssize_t ahci_alpm_show_slumber(struct device *dev,
++ struct device_attribute *attr, char *buf);
++static ssize_t ahci_alpm_show_partial(struct device *dev,
++ struct device_attribute *attr, char *buf);
++static ssize_t ahci_alpm_show_accounting(struct device *dev,
++ struct device_attribute *attr, char *buf);
++static ssize_t ahci_alpm_set_accounting(struct device *dev,
++ struct device_attribute *attr,
++ const char *buf, size_t count);
+ static ssize_t ahci_led_show(struct ata_port *ap, char *buf);
+ static ssize_t ahci_led_store(struct ata_port *ap, const char *buf,
+ size_t size);
+@@ -117,6 +128,12 @@ static DEVICE_ATTR(ahci_host_caps, S_IRU
+ static DEVICE_ATTR(ahci_host_cap2, S_IRUGO, ahci_show_host_cap2, NULL);
+ static DEVICE_ATTR(ahci_host_version, S_IRUGO, ahci_show_host_version, NULL);
+ static DEVICE_ATTR(ahci_port_cmd, S_IRUGO, ahci_show_port_cmd, NULL);
++static DEVICE_ATTR(ahci_alpm_active, S_IRUGO, ahci_alpm_show_active, NULL);
++static DEVICE_ATTR(ahci_alpm_partial, S_IRUGO, ahci_alpm_show_partial, NULL);
++static DEVICE_ATTR(ahci_alpm_slumber, S_IRUGO, ahci_alpm_show_slumber, NULL);
++static DEVICE_ATTR(ahci_alpm_accounting, S_IRUGO | S_IWUSR,
++ ahci_alpm_show_accounting, ahci_alpm_set_accounting);
++
+ static DEVICE_ATTR(em_buffer, S_IWUSR | S_IRUGO,
+ ahci_read_em_buffer, ahci_store_em_buffer);
+
+@@ -128,6 +145,10 @@ struct device_attribute *ahci_shost_attr
+ &dev_attr_ahci_host_cap2,
+ &dev_attr_ahci_host_version,
+ &dev_attr_ahci_port_cmd,
++ &dev_attr_ahci_alpm_active,
++ &dev_attr_ahci_alpm_partial,
++ &dev_attr_ahci_alpm_slumber,
++ &dev_attr_ahci_alpm_accounting,
+ &dev_attr_em_buffer,
+ NULL
+ };
+@@ -653,9 +674,14 @@ static int ahci_set_lpm(struct ata_link
+ * Disable interrupts on Phy Ready. This keeps us from
+ * getting woken up due to spurious phy ready
+ * interrupts.
++ *
++ * However, when accounting_active is set, we do want
++ * the interrupts for accounting purposes.
+ */
+- pp->intr_mask &= ~PORT_IRQ_PHYRDY;
+- writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
++ if (!pp->accounting_active) {
++ pp->intr_mask &= ~PORT_IRQ_PHYRDY;
++ writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
++ }
+
+ sata_link_scr_lpm(link, policy, false);
+ }
+@@ -1570,6 +1596,162 @@ static void ahci_error_intr(struct ata_p
+ ata_port_abort(ap);
+ }
+
++static int get_current_alpm_state(struct ata_port *ap)
++{
++ u32 status = 0;
++
++ ahci_scr_read(&ap->link, SCR_STATUS, &status);
++
++ /* link status is in bits 11-8 */
++ status = status >> 8;
++ status = status & 0x7;
++
++ if (status == 6)
++ return AHCI_PORT_SLUMBER;
++ if (status == 2)
++ return AHCI_PORT_PARTIAL;
++ if (status == 1)
++ return AHCI_PORT_ACTIVE;
++ return AHCI_PORT_NOLINK;
++}
++
++static void account_alpm_stats(struct ata_port *ap)
++{
++ struct ahci_port_priv *pp;
++
++ int new_state;
++ u64 new_jiffies, jiffies_delta;
++
++ if (ap == NULL)
++ return;
++ pp = ap->private_data;
++
++ if (!pp) return;
++
++ new_state = get_current_alpm_state(ap);
++ new_jiffies = jiffies;
++
++ jiffies_delta = new_jiffies - pp->previous_jiffies;
++
++ switch (pp->previous_state) {
++ case AHCI_PORT_NOLINK:
++ pp->active_jiffies = 0;
++ pp->partial_jiffies = 0;
++ pp->slumber_jiffies = 0;
++ break;
++ case AHCI_PORT_ACTIVE:
++ pp->active_jiffies += jiffies_delta;
++ break;
++ case AHCI_PORT_PARTIAL:
++ pp->partial_jiffies += jiffies_delta;
++ break;
++ case AHCI_PORT_SLUMBER:
++ pp->slumber_jiffies += jiffies_delta;
++ break;
++ default:
++ break;
++ }
++ pp->previous_state = new_state;
++ pp->previous_jiffies = new_jiffies;
++}
++
++static ssize_t ahci_alpm_show_active(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(dev);
++ struct ata_port *ap = ata_shost_to_port(shost);
++ struct ahci_port_priv *pp;
++
++ if (!ap || ata_port_is_dummy(ap))
++ return -EINVAL;
++ pp = ap->private_data;
++ account_alpm_stats(ap);
++
++ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->active_jiffies));
++}
++
++static ssize_t ahci_alpm_show_partial(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(dev);
++ struct ata_port *ap = ata_shost_to_port(shost);
++ struct ahci_port_priv *pp;
++
++ if (!ap || ata_port_is_dummy(ap))
++ return -EINVAL;
++
++ pp = ap->private_data;
++ account_alpm_stats(ap);
++
++ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->partial_jiffies));
++}
++
++static ssize_t ahci_alpm_show_slumber(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(dev);
++ struct ata_port *ap = ata_shost_to_port(shost);
++ struct ahci_port_priv *pp;
++
++ if (!ap || ata_port_is_dummy(ap))
++ return -EINVAL;
++
++ pp = ap->private_data;
++
++ account_alpm_stats(ap);
++
++ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->slumber_jiffies));
++}
++
++static ssize_t ahci_alpm_show_accounting(struct device *dev,
++ struct device_attribute *attr, char *buf)
++{
++ struct Scsi_Host *shost = class_to_shost(dev);
++ struct ata_port *ap = ata_shost_to_port(shost);
++ struct ahci_port_priv *pp;
++
++ if (!ap || ata_port_is_dummy(ap))
++ return -EINVAL;
++
++ pp = ap->private_data;
++
++ return sprintf(buf, "%u\n", pp->accounting_active);
++}
++
++static ssize_t ahci_alpm_set_accounting(struct device *dev,
++ struct device_attribute *attr,
++ const char *buf, size_t count)
++{
++ unsigned long flags;
++ struct Scsi_Host *shost = class_to_shost(dev);
++ struct ata_port *ap = ata_shost_to_port(shost);
++ struct ahci_port_priv *pp;
++ void __iomem *port_mmio;
++
++ if (!ap || ata_port_is_dummy(ap))
++ return 1;
++
++ pp = ap->private_data;
++ port_mmio = ahci_port_base(ap);
++
++ if (!pp)
++ return 1;
++ if (buf[0] == '0')
++ pp->accounting_active = 0;
++ if (buf[0] == '1')
++ pp->accounting_active = 1;
++
++ /* we need to enable the PHYRDY interrupt when we want accounting */
++ if (pp->accounting_active) {
++ spin_lock_irqsave(ap->lock, flags);
++ pp->intr_mask |= PORT_IRQ_PHYRDY;
++ writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK);
++ spin_unlock_irqrestore(ap->lock, flags);
++ }
++ return count;
++}
++
++
+ static void ahci_port_intr(struct ata_port *ap)
+ {
+ void __iomem *port_mmio = ahci_port_base(ap);
+@@ -1590,6 +1772,7 @@ static void ahci_port_intr(struct ata_po
+ /* if LPM is enabled, PHYRDY doesn't mean anything */
+ if (ap->link.lpm_policy > ATA_LPM_MAX_POWER) {
+ status &= ~PORT_IRQ_PHYRDY;
++ account_alpm_stats(ap);
+ ahci_scr_write(&ap->link, SCR_ERROR, SERR_PHYRDY_CHG);
+ }
+
diff --git a/patches/linux-2.6.37-rc3-vfs-dirty-inode.patch b/patches/linux-2.6.37-rc3-vfs-dirty-inode.patch
new file mode 100644
index 0000000..8365f94
--- /dev/null
+++ b/patches/linux-2.6.37-rc3-vfs-dirty-inode.patch
@@ -0,0 +1,105 @@
+From 880a17e27f0b90d32c0c0b9cf4cd9b4e5d779c73 Mon Sep 17 00:00:00 2001
+From: Arjan van de Ven <arjan@linux.intel.com>
+Date: Fri, 26 Nov 2010 12:18:03 -0800
+Subject: [PATCH] vfs: Add a trace point in the mark_inode_dirty function
+
+PowerTOP would like to be able to show who is keeping the disk
+busy by dirtying data. The most logical spot for this is in the vfs
+in the mark_inode_dirty() function, doing this on the block level
+is not possible because by the time the IO hits the block layer the
+guilty party can no longer be found ("kjournald" and "pdflush" are not
+useful answers to "who caused this file to be dirty).
+
+The trace point follows the same logic/style as the block_dump code
+and pretty much dumps the same data, just not to dmesg (and thus to
+/var/log/messages) but via the trace events streams.
+
+Eventually we should be able to phase out the block dump code, but that's
+for later on after a transition time.
+
+Signed-of-by: Arjan van de Ven <arjan@linux.intel.com>
+---
+ fs/fs-writeback.c | 4 ++++
+ fs/inode.c | 4 ++++
+ include/trace/events/vfs.h | 35 +++++++++++++++++++++++++++++++++++
+ 3 files changed, 43 insertions(+), 0 deletions(-)
+ create mode 100644 include/trace/events/vfs.h
+
+diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
+index 3d06ccc..6c93517 100644
+--- a/fs/fs-writeback.c
++++ b/fs/fs-writeback.c
+@@ -27,6 +27,7 @@
+ #include <linux/backing-dev.h>
+ #include <linux/buffer_head.h>
+ #include <linux/tracepoint.h>
++#include <trace/events/vfs.h>
+ #include "internal.h"
+
+ /*
+@@ -942,6 +943,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
+ sb->s_op->dirty_inode(inode);
+ }
+
++ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES))
++ trace_inode_dirty(inode);
++
+ /*
+ * make sure that changes are seen by all cpus before we test i_state
+ * -- mikulas
+diff --git a/fs/inode.c b/fs/inode.c
+index ae2727a..e8b1d42 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -1708,3 +1708,7 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
+ inode->i_mode = mode;
+ }
+ EXPORT_SYMBOL(inode_init_owner);
++
++#define CREATE_TRACE_POINTS
++#include <trace/events/vfs.h>
++
+diff --git a/include/trace/events/vfs.h b/include/trace/events/vfs.h
+new file mode 100644
+index 0000000..7e5c135
+--- /dev/null
++++ b/include/trace/events/vfs.h
+@@ -0,0 +1,35 @@
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM vfs
++
++#if !defined(_TRACE_VFS_H) || defined(TRACE_HEADER_MULTI_READ)
++#define _TRACE_VFS_H
++
++/*
++ * Tracepoint for dirtying an inode:
++ */
++TRACE_EVENT(inode_dirty,
++
++ TP_PROTO(struct inode *inode),
++
++ TP_ARGS(inode),
++
++ TP_STRUCT__entry(
++ __field( int, dev_major )
++ __field( int, dev_minor )
++ __field( ino_t, ino )
++ ),
++
++ TP_fast_assign(
++ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
++ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
++ __entry->ino = inode->i_ino;
++ ),
++
++ TP_printk("dev %d,%d ino %lu ", __entry->dev_major, __entry->dev_minor,
++ (unsigned long) __entry->ino)
++);
++
++#endif /* _TRACE_VFS_H */
++
++/* This part must be outside protection */
++#include <trace/define_trace.h>
+--
+1.7.2.3
+