diff options
author | Arjan van de Ven <arjan@linux.intel.com> | 2010-11-26 12:50:39 -0800 |
---|---|---|
committer | Arjan van de Ven <arjan@linux.intel.com> | 2010-11-26 12:50:39 -0800 |
commit | 89859fd990c8f73e246b83f2405ef7bc5250049b (patch) | |
tree | 275305e5f9716f2acce8a7f712e37a31745ceacd /patches | |
parent | da4a519d692c45aca57e2b31bed369e7ff7be535 (diff) |
adding inode dirty tracing....
Diffstat (limited to 'patches')
-rw-r--r-- | patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch | 302 | ||||
-rw-r--r-- | patches/linux-2.6.37-rc3-vfs-dirty-inode.patch | 105 |
2 files changed, 407 insertions, 0 deletions
diff --git a/patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch b/patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch new file mode 100644 index 0000000..bd451e2 --- /dev/null +++ b/patches/linux-2.6.37-rc1-ahci-alpm-accounting.patch @@ -0,0 +1,302 @@ +From: Arjan van de Ven <arjan@linux.intel.com> +Subject: [PATCH] libata: Add ALPM power state accounting to the AHCI driver + +PowerTOP wants to be able to show the user how effective the ALPM link +power management is for the user. ALPM is worth around 0.5W on a quiet +link; PowerTOP wants to be able to find cases where the "quiet link" isn't +actually quiet. + +This patch adds state accounting functionality to the AHCI driver for +PowerTOP to use. +The parts of the patch are +1) the sysfs logic of exposing the stats for each state in sysfs +2) the basic accounting logic that gets update on link change interrupts + (or when the user accesses the info from sysfs) +3) a "accounting enable" flag; in order to get the accounting to work, + the driver needs to get phyrdy interrupts on link status changes. + Normally and currently this is disabled by the driver when ALPM is + on (to reduce overhead); when PowerTOP is running this will need + to be on to get usable statistics... hence the sysfs tunable. + +The PowerTOP output currently looks like this: + +Recent SATA AHCI link activity statistics +Active Partial Slumber Device name + 0.5% 99.5% 0.0% host0 + +(work to resolve "host0" to a more human readable name is in progress) + +Signed-off-by: Arjan van de Ven <arjan@linux.intel.com> + +--- + drivers/ata/ahci.h | 15 ++++ + drivers/ata/libahci.c | 187 +++++++++++++++++++++++++++++++++++++++++++++++++- + 2 files changed, 200 insertions(+), 2 deletions(-) + +Index: linux.trees.git/drivers/ata/ahci.h +=================================================================== +--- linux.trees.git.orig/drivers/ata/ahci.h ++++ linux.trees.git/drivers/ata/ahci.h +@@ -262,6 +262,13 @@ struct ahci_em_priv { + unsigned long led_state; + }; + ++enum ahci_port_states { ++ AHCI_PORT_NOLINK = 0, ++ AHCI_PORT_ACTIVE = 1, ++ AHCI_PORT_PARTIAL = 2, ++ AHCI_PORT_SLUMBER = 3 ++}; ++ + struct ahci_port_priv { + struct ata_link *active_link; + struct ahci_cmd_hdr *cmd_slot; +@@ -280,6 +287,14 @@ struct ahci_port_priv { + int fbs_last_dev; /* save FBS.DEV of last FIS */ + /* enclosure management info per PM slot */ + struct ahci_em_priv em_priv[EM_MAX_SLOTS]; ++ ++ /* ALPM accounting state and stats */ ++ unsigned int accounting_active:1; ++ u64 active_jiffies; ++ u64 partial_jiffies; ++ u64 slumber_jiffies; ++ int previous_state; ++ int previous_jiffies; + }; + + struct ahci_host_priv { +Index: linux.trees.git/drivers/ata/libahci.c +=================================================================== +--- linux.trees.git.orig/drivers/ata/libahci.c ++++ linux.trees.git/drivers/ata/libahci.c +@@ -58,6 +58,17 @@ MODULE_PARM_DESC(ignore_sss, "Ignore sta + + static int ahci_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, + unsigned hints); ++static ssize_t ahci_alpm_show_active(struct device *dev, ++ struct device_attribute *attr, char *buf); ++static ssize_t ahci_alpm_show_slumber(struct device *dev, ++ struct device_attribute *attr, char *buf); ++static ssize_t ahci_alpm_show_partial(struct device *dev, ++ struct device_attribute *attr, char *buf); ++static ssize_t ahci_alpm_show_accounting(struct device *dev, ++ struct device_attribute *attr, char *buf); ++static ssize_t ahci_alpm_set_accounting(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count); + static ssize_t ahci_led_show(struct ata_port *ap, char *buf); + static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, + size_t size); +@@ -117,6 +128,12 @@ static DEVICE_ATTR(ahci_host_caps, S_IRU + static DEVICE_ATTR(ahci_host_cap2, S_IRUGO, ahci_show_host_cap2, NULL); + static DEVICE_ATTR(ahci_host_version, S_IRUGO, ahci_show_host_version, NULL); + static DEVICE_ATTR(ahci_port_cmd, S_IRUGO, ahci_show_port_cmd, NULL); ++static DEVICE_ATTR(ahci_alpm_active, S_IRUGO, ahci_alpm_show_active, NULL); ++static DEVICE_ATTR(ahci_alpm_partial, S_IRUGO, ahci_alpm_show_partial, NULL); ++static DEVICE_ATTR(ahci_alpm_slumber, S_IRUGO, ahci_alpm_show_slumber, NULL); ++static DEVICE_ATTR(ahci_alpm_accounting, S_IRUGO | S_IWUSR, ++ ahci_alpm_show_accounting, ahci_alpm_set_accounting); ++ + static DEVICE_ATTR(em_buffer, S_IWUSR | S_IRUGO, + ahci_read_em_buffer, ahci_store_em_buffer); + +@@ -128,6 +145,10 @@ struct device_attribute *ahci_shost_attr + &dev_attr_ahci_host_cap2, + &dev_attr_ahci_host_version, + &dev_attr_ahci_port_cmd, ++ &dev_attr_ahci_alpm_active, ++ &dev_attr_ahci_alpm_partial, ++ &dev_attr_ahci_alpm_slumber, ++ &dev_attr_ahci_alpm_accounting, + &dev_attr_em_buffer, + NULL + }; +@@ -653,9 +674,14 @@ static int ahci_set_lpm(struct ata_link + * Disable interrupts on Phy Ready. This keeps us from + * getting woken up due to spurious phy ready + * interrupts. ++ * ++ * However, when accounting_active is set, we do want ++ * the interrupts for accounting purposes. + */ +- pp->intr_mask &= ~PORT_IRQ_PHYRDY; +- writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); ++ if (!pp->accounting_active) { ++ pp->intr_mask &= ~PORT_IRQ_PHYRDY; ++ writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); ++ } + + sata_link_scr_lpm(link, policy, false); + } +@@ -1570,6 +1596,162 @@ static void ahci_error_intr(struct ata_p + ata_port_abort(ap); + } + ++static int get_current_alpm_state(struct ata_port *ap) ++{ ++ u32 status = 0; ++ ++ ahci_scr_read(&ap->link, SCR_STATUS, &status); ++ ++ /* link status is in bits 11-8 */ ++ status = status >> 8; ++ status = status & 0x7; ++ ++ if (status == 6) ++ return AHCI_PORT_SLUMBER; ++ if (status == 2) ++ return AHCI_PORT_PARTIAL; ++ if (status == 1) ++ return AHCI_PORT_ACTIVE; ++ return AHCI_PORT_NOLINK; ++} ++ ++static void account_alpm_stats(struct ata_port *ap) ++{ ++ struct ahci_port_priv *pp; ++ ++ int new_state; ++ u64 new_jiffies, jiffies_delta; ++ ++ if (ap == NULL) ++ return; ++ pp = ap->private_data; ++ ++ if (!pp) return; ++ ++ new_state = get_current_alpm_state(ap); ++ new_jiffies = jiffies; ++ ++ jiffies_delta = new_jiffies - pp->previous_jiffies; ++ ++ switch (pp->previous_state) { ++ case AHCI_PORT_NOLINK: ++ pp->active_jiffies = 0; ++ pp->partial_jiffies = 0; ++ pp->slumber_jiffies = 0; ++ break; ++ case AHCI_PORT_ACTIVE: ++ pp->active_jiffies += jiffies_delta; ++ break; ++ case AHCI_PORT_PARTIAL: ++ pp->partial_jiffies += jiffies_delta; ++ break; ++ case AHCI_PORT_SLUMBER: ++ pp->slumber_jiffies += jiffies_delta; ++ break; ++ default: ++ break; ++ } ++ pp->previous_state = new_state; ++ pp->previous_jiffies = new_jiffies; ++} ++ ++static ssize_t ahci_alpm_show_active(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(dev); ++ struct ata_port *ap = ata_shost_to_port(shost); ++ struct ahci_port_priv *pp; ++ ++ if (!ap || ata_port_is_dummy(ap)) ++ return -EINVAL; ++ pp = ap->private_data; ++ account_alpm_stats(ap); ++ ++ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->active_jiffies)); ++} ++ ++static ssize_t ahci_alpm_show_partial(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(dev); ++ struct ata_port *ap = ata_shost_to_port(shost); ++ struct ahci_port_priv *pp; ++ ++ if (!ap || ata_port_is_dummy(ap)) ++ return -EINVAL; ++ ++ pp = ap->private_data; ++ account_alpm_stats(ap); ++ ++ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->partial_jiffies)); ++} ++ ++static ssize_t ahci_alpm_show_slumber(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(dev); ++ struct ata_port *ap = ata_shost_to_port(shost); ++ struct ahci_port_priv *pp; ++ ++ if (!ap || ata_port_is_dummy(ap)) ++ return -EINVAL; ++ ++ pp = ap->private_data; ++ ++ account_alpm_stats(ap); ++ ++ return sprintf(buf, "%u\n", jiffies_to_msecs(pp->slumber_jiffies)); ++} ++ ++static ssize_t ahci_alpm_show_accounting(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ struct Scsi_Host *shost = class_to_shost(dev); ++ struct ata_port *ap = ata_shost_to_port(shost); ++ struct ahci_port_priv *pp; ++ ++ if (!ap || ata_port_is_dummy(ap)) ++ return -EINVAL; ++ ++ pp = ap->private_data; ++ ++ return sprintf(buf, "%u\n", pp->accounting_active); ++} ++ ++static ssize_t ahci_alpm_set_accounting(struct device *dev, ++ struct device_attribute *attr, ++ const char *buf, size_t count) ++{ ++ unsigned long flags; ++ struct Scsi_Host *shost = class_to_shost(dev); ++ struct ata_port *ap = ata_shost_to_port(shost); ++ struct ahci_port_priv *pp; ++ void __iomem *port_mmio; ++ ++ if (!ap || ata_port_is_dummy(ap)) ++ return 1; ++ ++ pp = ap->private_data; ++ port_mmio = ahci_port_base(ap); ++ ++ if (!pp) ++ return 1; ++ if (buf[0] == '0') ++ pp->accounting_active = 0; ++ if (buf[0] == '1') ++ pp->accounting_active = 1; ++ ++ /* we need to enable the PHYRDY interrupt when we want accounting */ ++ if (pp->accounting_active) { ++ spin_lock_irqsave(ap->lock, flags); ++ pp->intr_mask |= PORT_IRQ_PHYRDY; ++ writel(pp->intr_mask, port_mmio + PORT_IRQ_MASK); ++ spin_unlock_irqrestore(ap->lock, flags); ++ } ++ return count; ++} ++ ++ + static void ahci_port_intr(struct ata_port *ap) + { + void __iomem *port_mmio = ahci_port_base(ap); +@@ -1590,6 +1772,7 @@ static void ahci_port_intr(struct ata_po + /* if LPM is enabled, PHYRDY doesn't mean anything */ + if (ap->link.lpm_policy > ATA_LPM_MAX_POWER) { + status &= ~PORT_IRQ_PHYRDY; ++ account_alpm_stats(ap); + ahci_scr_write(&ap->link, SCR_ERROR, SERR_PHYRDY_CHG); + } + diff --git a/patches/linux-2.6.37-rc3-vfs-dirty-inode.patch b/patches/linux-2.6.37-rc3-vfs-dirty-inode.patch new file mode 100644 index 0000000..8365f94 --- /dev/null +++ b/patches/linux-2.6.37-rc3-vfs-dirty-inode.patch @@ -0,0 +1,105 @@ +From 880a17e27f0b90d32c0c0b9cf4cd9b4e5d779c73 Mon Sep 17 00:00:00 2001 +From: Arjan van de Ven <arjan@linux.intel.com> +Date: Fri, 26 Nov 2010 12:18:03 -0800 +Subject: [PATCH] vfs: Add a trace point in the mark_inode_dirty function + +PowerTOP would like to be able to show who is keeping the disk +busy by dirtying data. The most logical spot for this is in the vfs +in the mark_inode_dirty() function, doing this on the block level +is not possible because by the time the IO hits the block layer the +guilty party can no longer be found ("kjournald" and "pdflush" are not +useful answers to "who caused this file to be dirty). + +The trace point follows the same logic/style as the block_dump code +and pretty much dumps the same data, just not to dmesg (and thus to +/var/log/messages) but via the trace events streams. + +Eventually we should be able to phase out the block dump code, but that's +for later on after a transition time. + +Signed-of-by: Arjan van de Ven <arjan@linux.intel.com> +--- + fs/fs-writeback.c | 4 ++++ + fs/inode.c | 4 ++++ + include/trace/events/vfs.h | 35 +++++++++++++++++++++++++++++++++++ + 3 files changed, 43 insertions(+), 0 deletions(-) + create mode 100644 include/trace/events/vfs.h + +diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c +index 3d06ccc..6c93517 100644 +--- a/fs/fs-writeback.c ++++ b/fs/fs-writeback.c +@@ -27,6 +27,7 @@ + #include <linux/backing-dev.h> + #include <linux/buffer_head.h> + #include <linux/tracepoint.h> ++#include <trace/events/vfs.h> + #include "internal.h" + + /* +@@ -942,6 +943,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) + sb->s_op->dirty_inode(inode); + } + ++ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)) ++ trace_inode_dirty(inode); ++ + /* + * make sure that changes are seen by all cpus before we test i_state + * -- mikulas +diff --git a/fs/inode.c b/fs/inode.c +index ae2727a..e8b1d42 100644 +--- a/fs/inode.c ++++ b/fs/inode.c +@@ -1708,3 +1708,7 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, + inode->i_mode = mode; + } + EXPORT_SYMBOL(inode_init_owner); ++ ++#define CREATE_TRACE_POINTS ++#include <trace/events/vfs.h> ++ +diff --git a/include/trace/events/vfs.h b/include/trace/events/vfs.h +new file mode 100644 +index 0000000..7e5c135 +--- /dev/null ++++ b/include/trace/events/vfs.h +@@ -0,0 +1,35 @@ ++#undef TRACE_SYSTEM ++#define TRACE_SYSTEM vfs ++ ++#if !defined(_TRACE_VFS_H) || defined(TRACE_HEADER_MULTI_READ) ++#define _TRACE_VFS_H ++ ++/* ++ * Tracepoint for dirtying an inode: ++ */ ++TRACE_EVENT(inode_dirty, ++ ++ TP_PROTO(struct inode *inode), ++ ++ TP_ARGS(inode), ++ ++ TP_STRUCT__entry( ++ __field( int, dev_major ) ++ __field( int, dev_minor ) ++ __field( ino_t, ino ) ++ ), ++ ++ TP_fast_assign( ++ __entry->dev_major = MAJOR(inode->i_sb->s_dev); ++ __entry->dev_minor = MINOR(inode->i_sb->s_dev); ++ __entry->ino = inode->i_ino; ++ ), ++ ++ TP_printk("dev %d,%d ino %lu ", __entry->dev_major, __entry->dev_minor, ++ (unsigned long) __entry->ino) ++); ++ ++#endif /* _TRACE_VFS_H */ ++ ++/* This part must be outside protection */ ++#include <trace/define_trace.h> +-- +1.7.2.3 + |