From a2daff6803a384ce065e3681a2affea1da59c5f5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 31 May 2011 14:09:00 -0700 Subject: fuse: fix non-ANSI void function notation Fix void function parameter list sparse warning: fs/fuse/inode.c:74:44: warning: non-ANSI function declaration of function 'fuse_alloc_forget' Signed-off-by: Randy Dunlap Signed-off-by: Miklos Szeredi --- fs/fuse/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index cc6ec4b2f0ff..5354906e797c 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -71,7 +71,7 @@ struct fuse_mount_data { unsigned blksize; }; -struct fuse_forget_link *fuse_alloc_forget() +struct fuse_forget_link *fuse_alloc_forget(void) { return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL); } -- cgit v1.2.3 From afe48049ab1d0ca83afe45f9d5116bf4507741eb Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 17 Jun 2011 08:21:10 +0000 Subject: ARM: mach-shmobile: sh7372: Add USB-DMAC support Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 5 +- arch/arm/mach-shmobile/include/mach/sh7372.h | 4 + arch/arm/mach-shmobile/setup-sh7372.c | 146 +++++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index c0800d83971e..c239ab10c95b 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -509,7 +509,7 @@ enum { MSTP001, MSTP118, MSTP117, MSTP116, MSTP113, MSTP106, MSTP101, MSTP100, MSTP223, - MSTP218, MSTP217, MSTP216, + MSTP214, MSTP218, MSTP217, MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, MSTP329, MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312, MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403, @@ -538,6 +538,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP218] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 18, 0), /* DMAC1 */ [MSTP217] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 17, 0), /* DMAC2 */ [MSTP216] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 16, 0), /* DMAC3 */ + [MSTP214] = MSTP(&div4_clks[DIV4_HP], SMSTPCR2, 14, 0), /* USBDMAC */ [MSTP207] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 7, 0), /* SCIFA5 */ [MSTP206] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 6, 0), /* SCIFB */ [MSTP204] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 4, 0), /* SCIFA0 */ @@ -633,6 +634,8 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh-dma-engine.0", &mstp_clks[MSTP218]), /* DMAC1 */ CLKDEV_DEV_ID("sh-dma-engine.1", &mstp_clks[MSTP217]), /* DMAC2 */ CLKDEV_DEV_ID("sh-dma-engine.2", &mstp_clks[MSTP216]), /* DMAC3 */ + CLKDEV_DEV_ID("sh-dma-engine.3", &mstp_clks[MSTP214]), /* USB-DMAC0 */ + CLKDEV_DEV_ID("sh-dma-engine.4", &mstp_clks[MSTP214]), /* USB-DMAC1 */ CLKDEV_DEV_ID("sh-sci.5", &mstp_clks[MSTP207]), /* SCIFA5 */ CLKDEV_DEV_ID("sh-sci.6", &mstp_clks[MSTP206]), /* SCIFB */ CLKDEV_DEV_ID("sh-sci.0", &mstp_clks[MSTP204]), /* SCIFA0 */ diff --git a/arch/arm/mach-shmobile/include/mach/sh7372.h b/arch/arm/mach-shmobile/include/mach/sh7372.h index df20d7670172..51db9d3a2cac 100644 --- a/arch/arm/mach-shmobile/include/mach/sh7372.h +++ b/arch/arm/mach-shmobile/include/mach/sh7372.h @@ -458,6 +458,10 @@ enum { SHDMA_SLAVE_SDHI2_TX, SHDMA_SLAVE_MMCIF_RX, SHDMA_SLAVE_MMCIF_TX, + SHDMA_SLAVE_USB0_TX, + SHDMA_SLAVE_USB0_RX, + SHDMA_SLAVE_USB1_TX, + SHDMA_SLAVE_USB1_RX, }; extern struct clk sh7372_extal1_clk; diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c index cd807eea69e2..f2a58d48bfb4 100644 --- a/arch/arm/mach-shmobile/setup-sh7372.c +++ b/arch/arm/mach-shmobile/setup-sh7372.c @@ -602,6 +602,150 @@ static struct platform_device dma2_device = { }, }; +/* + * USB-DMAC + */ + +unsigned int usbts_shift[] = {3, 4, 5}; + +enum { + XMIT_SZ_8BYTE = 0, + XMIT_SZ_16BYTE = 1, + XMIT_SZ_32BYTE = 2, +}; + +#define USBTS_INDEX2VAL(i) (((i) & 3) << 6) + +static const struct sh_dmae_channel sh7372_usb_dmae_channels[] = { + { + .offset = 0, + }, { + .offset = 0x20, + }, +}; + +/* USB DMAC0 */ +static const struct sh_dmae_slave_config sh7372_usb_dmae0_slaves[] = { + { + .slave_id = SHDMA_SLAVE_USB0_TX, + .chcr = USBTS_INDEX2VAL(XMIT_SZ_8BYTE), + }, { + .slave_id = SHDMA_SLAVE_USB0_RX, + .chcr = USBTS_INDEX2VAL(XMIT_SZ_8BYTE), + }, +}; + +static struct sh_dmae_pdata usb_dma0_platform_data = { + .slave = sh7372_usb_dmae0_slaves, + .slave_num = ARRAY_SIZE(sh7372_usb_dmae0_slaves), + .channel = sh7372_usb_dmae_channels, + .channel_num = ARRAY_SIZE(sh7372_usb_dmae_channels), + .ts_low_shift = 6, + .ts_low_mask = 0xc0, + .ts_high_shift = 0, + .ts_high_mask = 0, + .ts_shift = usbts_shift, + .ts_shift_num = ARRAY_SIZE(usbts_shift), + .dmaor_init = DMAOR_DME, + .chcr_offset = 0x14, + .chcr_ie_bit = 1 << 5, + .dmaor_is_32bit = 1, + .needs_tend_set = 1, + .no_dmars = 1, +}; + +static struct resource sh7372_usb_dmae0_resources[] = { + { + /* Channel registers and DMAOR */ + .start = 0xe68a0020, + .end = 0xe68a0064 - 1, + .flags = IORESOURCE_MEM, + }, + { + /* VCR/SWR/DMICR */ + .start = 0xe68a0000, + .end = 0xe68a0014 - 1, + .flags = IORESOURCE_MEM, + }, + { + /* IRQ for channels */ + .start = evt2irq(0x0a00), + .end = evt2irq(0x0a00), + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device usb_dma0_device = { + .name = "sh-dma-engine", + .id = 3, + .resource = sh7372_usb_dmae0_resources, + .num_resources = ARRAY_SIZE(sh7372_usb_dmae0_resources), + .dev = { + .platform_data = &usb_dma0_platform_data, + }, +}; + +/* USB DMAC1 */ +static const struct sh_dmae_slave_config sh7372_usb_dmae1_slaves[] = { + { + .slave_id = SHDMA_SLAVE_USB1_TX, + .chcr = USBTS_INDEX2VAL(XMIT_SZ_8BYTE), + }, { + .slave_id = SHDMA_SLAVE_USB1_RX, + .chcr = USBTS_INDEX2VAL(XMIT_SZ_8BYTE), + }, +}; + +static struct sh_dmae_pdata usb_dma1_platform_data = { + .slave = sh7372_usb_dmae1_slaves, + .slave_num = ARRAY_SIZE(sh7372_usb_dmae1_slaves), + .channel = sh7372_usb_dmae_channels, + .channel_num = ARRAY_SIZE(sh7372_usb_dmae_channels), + .ts_low_shift = 6, + .ts_low_mask = 0xc0, + .ts_high_shift = 0, + .ts_high_mask = 0, + .ts_shift = usbts_shift, + .ts_shift_num = ARRAY_SIZE(usbts_shift), + .dmaor_init = DMAOR_DME, + .chcr_offset = 0x14, + .chcr_ie_bit = 1 << 5, + .dmaor_is_32bit = 1, + .needs_tend_set = 1, + .no_dmars = 1, +}; + +static struct resource sh7372_usb_dmae1_resources[] = { + { + /* Channel registers and DMAOR */ + .start = 0xe68c0020, + .end = 0xe68c0064 - 1, + .flags = IORESOURCE_MEM, + }, + { + /* VCR/SWR/DMICR */ + .start = 0xe68c0000, + .end = 0xe68c0014 - 1, + .flags = IORESOURCE_MEM, + }, + { + /* IRQ for channels */ + .start = evt2irq(0x1d00), + .end = evt2irq(0x1d00), + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device usb_dma1_device = { + .name = "sh-dma-engine", + .id = 4, + .resource = sh7372_usb_dmae1_resources, + .num_resources = ARRAY_SIZE(sh7372_usb_dmae1_resources), + .dev = { + .platform_data = &usb_dma1_platform_data, + }, +}; + /* VPU */ static struct uio_info vpu_platform_data = { .name = "VPU5HG", @@ -829,6 +973,8 @@ static struct platform_device *sh7372_late_devices[] __initdata = { &dma0_device, &dma1_device, &dma2_device, + &usb_dma0_device, + &usb_dma1_device, &vpu_device, &veu0_device, &veu1_device, -- cgit v1.2.3 From 0ed61fc9da59ea45d56a6928653691cef14bab9b Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Thu, 30 Jun 2011 09:22:50 +0000 Subject: ARM: mach-shmobile: Use CMT2 for timer on sh7372 Switch the sh7372 CPU support to use CMT2 instead of CMT1 for system timer. CMT1 is located in the A3SP power domain while CMT2 is located in the always-on power domain C5. This improves our PM situation - with CMT2 as timer we can power down A3SP and still access the timer. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/clock-sh7372.c | 8 ++++---- arch/arm/mach-shmobile/setup-sh7372.c | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index c0800d83971e..7fb1d2e52540 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -511,8 +511,8 @@ enum { MSTP001, MSTP223, MSTP218, MSTP217, MSTP216, MSTP207, MSTP206, MSTP204, MSTP203, MSTP202, MSTP201, MSTP200, - MSTP329, MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312, - MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403, + MSTP328, MSTP323, MSTP322, MSTP314, MSTP313, MSTP312, + MSTP423, MSTP415, MSTP413, MSTP411, MSTP410, MSTP406, MSTP403, MSTP400, MSTP_NR }; #define MSTP(_parent, _reg, _bit, _flags) \ @@ -545,7 +545,6 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP202] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 2, 0), /* SCIFA2 */ [MSTP201] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 1, 0), /* SCIFA3 */ [MSTP200] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR2, 0, 0), /* SCIFA4 */ - [MSTP329] = MSTP(&r_clk, SMSTPCR3, 29, 0), /* CMT10 */ [MSTP328] = MSTP(&div6_clks[DIV6_SPU], SMSTPCR3, 28, 0), /* FSI2 */ [MSTP323] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 23, 0), /* IIC1 */ [MSTP322] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR3, 22, 0), /* USB0 */ @@ -559,6 +558,7 @@ static struct clk mstp_clks[MSTP_NR] = { [MSTP410] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 10, 0), /* IIC4 */ [MSTP406] = MSTP(&div6_clks[DIV6_SUB], SMSTPCR4, 6, 0), /* USB1 */ [MSTP403] = MSTP(&r_clk, SMSTPCR4, 3, 0), /* KEYSC */ + [MSTP400] = MSTP(&r_clk, SMSTPCR4, 0, 0), /* CMT2 */ }; #define CLKDEV_CON_ID(_id, _clk) { .con_id = _id, .clk = _clk } @@ -640,7 +640,6 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("sh-sci.2", &mstp_clks[MSTP202]), /* SCIFA2 */ CLKDEV_DEV_ID("sh-sci.3", &mstp_clks[MSTP201]), /* SCIFA3 */ CLKDEV_DEV_ID("sh-sci.4", &mstp_clks[MSTP200]), /* SCIFA4 */ - CLKDEV_DEV_ID("sh_cmt.10", &mstp_clks[MSTP329]), /* CMT10 */ CLKDEV_DEV_ID("sh_fsi2", &mstp_clks[MSTP328]), /* FSI2 */ CLKDEV_DEV_ID("i2c-sh_mobile.1", &mstp_clks[MSTP323]), /* IIC1 */ CLKDEV_DEV_ID("r8a66597_hcd.0", &mstp_clks[MSTP322]), /* USB0 */ @@ -658,6 +657,7 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("r8a66597_udc.1", &mstp_clks[MSTP406]), /* USB1 */ CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[MSTP406]), /* USB1 */ CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */ + CLKDEV_DEV_ID("sh_cmt.2", &mstp_clks[MSTP400]), /* CMT2 */ CLKDEV_ICK_ID("ick", "sh-mobile-hdmi", &div6_reparent_clks[DIV6_HDMI]), CLKDEV_ICK_ID("icka", "sh_fsi2", &div6_reparent_clks[DIV6_FSIA]), diff --git a/arch/arm/mach-shmobile/setup-sh7372.c b/arch/arm/mach-shmobile/setup-sh7372.c index cd807eea69e2..cb8ac7845245 100644 --- a/arch/arm/mach-shmobile/setup-sh7372.c +++ b/arch/arm/mach-shmobile/setup-sh7372.c @@ -169,35 +169,35 @@ static struct platform_device scif6_device = { }; /* CMT */ -static struct sh_timer_config cmt10_platform_data = { - .name = "CMT10", - .channel_offset = 0x10, - .timer_bit = 0, +static struct sh_timer_config cmt2_platform_data = { + .name = "CMT2", + .channel_offset = 0x40, + .timer_bit = 5, .clockevent_rating = 125, .clocksource_rating = 125, }; -static struct resource cmt10_resources[] = { +static struct resource cmt2_resources[] = { [0] = { - .name = "CMT10", - .start = 0xe6138010, - .end = 0xe613801b, + .name = "CMT2", + .start = 0xe6130040, + .end = 0xe613004b, .flags = IORESOURCE_MEM, }, [1] = { - .start = evt2irq(0x0b00), /* CMT1_CMT10 */ + .start = evt2irq(0x0b80), /* CMT2 */ .flags = IORESOURCE_IRQ, }, }; -static struct platform_device cmt10_device = { +static struct platform_device cmt2_device = { .name = "sh_cmt", - .id = 10, + .id = 2, .dev = { - .platform_data = &cmt10_platform_data, + .platform_data = &cmt2_platform_data, }, - .resource = cmt10_resources, - .num_resources = ARRAY_SIZE(cmt10_resources), + .resource = cmt2_resources, + .num_resources = ARRAY_SIZE(cmt2_resources), }; /* TMU */ @@ -818,7 +818,7 @@ static struct platform_device *sh7372_early_devices[] __initdata = { &scif4_device, &scif5_device, &scif6_device, - &cmt10_device, + &cmt2_device, &tmu00_device, &tmu01_device, }; -- cgit v1.2.3 From b8e513a2ecafb5bb068c00be98d584871afcd4c3 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Sat, 9 Jul 2011 21:11:14 +0000 Subject: ARM: static should be at beginning of declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that the 'static' keywork is at the beginning of declaration for arch/arm/mach-shmobile/board-ap4evb.c This gets rid of warnings like warning: ‘static’ is not at beginning of declaration when building with -Wold-style-declaration (and/or -Wextra which also enables it). Signed-off-by: Jesper Juhl Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ap4evb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 803bc6edfca4..bb4e084920b8 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -443,7 +443,7 @@ static struct platform_device usb1_host_device = { .resource = usb1_host_resources, }; -const static struct fb_videomode ap4evb_lcdc_modes[] = { +static const struct fb_videomode ap4evb_lcdc_modes[] = { { #ifdef CONFIG_AP4EVB_QHD .name = "R63302(QHD)", -- cgit v1.2.3 From 196cfe2ae8fcdc03b3c7d627e7dfe8c0ce7229f9 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Thu, 14 Jul 2011 15:30:22 +0200 Subject: xen-blkfront: Drop name and minor adjustments for emulated scsi devices These were intended to avoid the namespace clash when representing emulated IDE and SCSI devices. However that seems to confuse users more than expected (a disk defined as sda becomes xvde). So for now go back to the scheme which does no adjustments. This will break when mixing IDE and SCSI names in the configuration of guests but should be by now expected. Acked-by: Stefano Stabellini Signed-off-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index b536a9cef917..238b9419c6d3 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -123,8 +123,8 @@ static DEFINE_SPINLOCK(minor_lock); #define BLKIF_MINOR_EXT(dev) ((dev)&(~EXTENDED)) #define EMULATED_HD_DISK_MINOR_OFFSET (0) #define EMULATED_HD_DISK_NAME_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET / 256) -#define EMULATED_SD_DISK_MINOR_OFFSET (EMULATED_HD_DISK_MINOR_OFFSET + (4 * 16)) -#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_HD_DISK_NAME_OFFSET + 4) +#define EMULATED_SD_DISK_MINOR_OFFSET (0) +#define EMULATED_SD_DISK_NAME_OFFSET (EMULATED_SD_DISK_MINOR_OFFSET / 256) #define DEV_NAME "xvd" /* name in /dev */ -- cgit v1.2.3 From 89153b5cae9f40c224a5d321665a97bf14220c2c Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Thu, 14 Jul 2011 15:30:37 +0200 Subject: xen-blkfront: Fix one off warning about name clash Avoid telling users to use xvde and onwards when using xvde. Acked-by: Stefano Stabellini Signed-off-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 238b9419c6d3..9ea8c2576c70 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -529,7 +529,7 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, minor = BLKIF_MINOR_EXT(info->vdevice); nr_parts = PARTS_PER_EXT_DISK; offset = minor / nr_parts; - if (xen_hvm_domain() && offset <= EMULATED_HD_DISK_NAME_OFFSET + 4) + if (xen_hvm_domain() && offset < EMULATED_HD_DISK_NAME_OFFSET + 4) printk(KERN_WARNING "blkfront: vdevice 0x%x might conflict with " "emulated IDE disks,\n\t choose an xvd device name" "from xvde on\n", info->vdevice); -- cgit v1.2.3 From 3f7e5e2423f6233f7665d54061ba7761ca90cf52 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 13 Jul 2011 07:59:48 +0000 Subject: clocksource: sh_cmt: wait for CMCNT on init V2 Add code to the CMT driver to wait for CMCNT V2. This to let the register value settle before starting the timer channel. Makes the driver more robust. Needed for CMT2 on sh7372 and certain CMT channels on sh73a0. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/clocksource/sh_cmt.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c index dc7c033ef587..32a77becc098 100644 --- a/drivers/clocksource/sh_cmt.c +++ b/drivers/clocksource/sh_cmt.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -150,13 +151,13 @@ static void sh_cmt_start_stop_ch(struct sh_cmt_priv *p, int start) static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) { - int ret; + int k, ret; /* enable clock */ ret = clk_enable(p->clk); if (ret) { dev_err(&p->pdev->dev, "cannot enable clock\n"); - return ret; + goto err0; } /* make sure channel is disabled */ @@ -174,9 +175,38 @@ static int sh_cmt_enable(struct sh_cmt_priv *p, unsigned long *rate) sh_cmt_write(p, CMCOR, 0xffffffff); sh_cmt_write(p, CMCNT, 0); + /* + * According to the sh73a0 user's manual, as CMCNT can be operated + * only by the RCLK (Pseudo 32 KHz), there's one restriction on + * modifying CMCNT register; two RCLK cycles are necessary before + * this register is either read or any modification of the value + * it holds is reflected in the LSI's actual operation. + * + * While at it, we're supposed to clear out the CMCNT as of this + * moment, so make sure it's processed properly here. This will + * take RCLKx2 at maximum. + */ + for (k = 0; k < 100; k++) { + if (!sh_cmt_read(p, CMCNT)) + break; + udelay(1); + } + + if (sh_cmt_read(p, CMCNT)) { + dev_err(&p->pdev->dev, "cannot clear CMCNT\n"); + ret = -ETIMEDOUT; + goto err1; + } + /* enable channel */ sh_cmt_start_stop_ch(p, 1); return 0; + err1: + /* stop clock */ + clk_disable(p->clk); + + err0: + return ret; } static void sh_cmt_disable(struct sh_cmt_priv *p) -- cgit v1.2.3 From b4300b72cfc01ea75b8aaede574bdfb04545d691 Mon Sep 17 00:00:00 2001 From: David Engraf Date: Wed, 20 Jul 2011 13:03:39 +0000 Subject: shwdt: fix usage of mod_timer This patch fixes the usage of mod_timer and makes the driver usable. mod_timer must be called with an absolute timeout in jiffies, the old implementation used a relative timeout thus the hardware watchdog was never triggered. Signed-off-by: David Engraf Signed-off-by: Paul Mundt --- drivers/watchdog/shwdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/watchdog/shwdt.c b/drivers/watchdog/shwdt.c index db84f2322d1a..a267dc078daf 100644 --- a/drivers/watchdog/shwdt.c +++ b/drivers/watchdog/shwdt.c @@ -64,7 +64,7 @@ * misses its deadline, the kernel timer will allow the WDT to overflow. */ static int clock_division_ratio = WTCSR_CKS_4096; -#define next_ping_period(cks) msecs_to_jiffies(cks - 4) +#define next_ping_period(cks) (jiffies + msecs_to_jiffies(cks - 4)) static const struct watchdog_info sh_wdt_info; static struct platform_device *sh_wdt_dev; -- cgit v1.2.3 From 9a14a92c939aea1aaf27f5ad37b26b235acc2a65 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Fri, 15 Jul 2011 10:58:55 +0000 Subject: sh: intc: enable both edges GPIO interrupts on sh7372 IRQ-capable GPIOs on sh7372 can be configured to produce interrupts on both edges. Signed-off-by: Guennadi Liakhovetski Acked-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/sh/intc/chip.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/sh/intc/chip.c b/drivers/sh/intc/chip.c index f33e2dd97934..33b2ed451e09 100644 --- a/drivers/sh/intc/chip.c +++ b/drivers/sh/intc/chip.c @@ -186,6 +186,9 @@ static unsigned char intc_irq_sense_table[IRQ_TYPE_SENSE_MASK + 1] = { !defined(CONFIG_CPU_SUBTYPE_SH7709) [IRQ_TYPE_LEVEL_HIGH] = VALID(3), #endif +#if defined(CONFIG_ARCH_SH7372) + [IRQ_TYPE_EDGE_BOTH] = VALID(4), +#endif }; static int intc_set_type(struct irq_data *data, unsigned int type) -- cgit v1.2.3 From 40c5cc263954444f5a76cbf25d408c42da480122 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 24 Jul 2011 22:39:12 +0100 Subject: regmap: Fix bulk reads We should be reading the number of bytes we were asked for, not the size of a single register. Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index cf3565cae93d..0eef4da1ac61 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -317,7 +317,7 @@ static int _regmap_raw_read(struct regmap *map, unsigned int reg, void *val, u8[0] |= map->bus->read_flag_mask; ret = map->bus->read(map->dev, map->work_buf, map->format.reg_bytes, - val, map->format.val_bytes); + val, val_len); if (ret != 0) return ret; -- cgit v1.2.3 From c5ad48f3117c4aae379b536f3270fc1efae945c0 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 20 Jun 2011 23:00:12 +0000 Subject: ARM: mach-shmobile: ag5evm: SDHI requires waiting for idle The SDHI block on the ag5evm requires waiting for idle before writing to some registers. Cc: Guennadi Liakhovetski Cc: Magnus Damm Signed-off-by: Simon Horman Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-ag5evm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-shmobile/board-ag5evm.c b/arch/arm/mach-shmobile/board-ag5evm.c index ce5c2513c6ce..cdfdd624d21d 100644 --- a/arch/arm/mach-shmobile/board-ag5evm.c +++ b/arch/arm/mach-shmobile/board-ag5evm.c @@ -341,6 +341,7 @@ static struct platform_device mipidsi0_device = { static struct sh_mobile_sdhi_info sdhi0_info = { .dma_slave_tx = SHDMA_SLAVE_SDHI0_TX, .dma_slave_rx = SHDMA_SLAVE_SDHI0_RX, + .tmio_flags = TMIO_MMC_HAS_IDLE_WAIT, .tmio_caps = MMC_CAP_SD_HIGHSPEED, .tmio_ocr_mask = MMC_VDD_27_28 | MMC_VDD_28_29, }; @@ -382,7 +383,7 @@ void ag5evm_sdhi1_set_pwr(struct platform_device *pdev, int state) } static struct sh_mobile_sdhi_info sh_sdhi1_info = { - .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE, + .tmio_flags = TMIO_MMC_WRPROTECT_DISABLE | TMIO_MMC_HAS_IDLE_WAIT, .tmio_caps = MMC_CAP_NONREMOVABLE | MMC_CAP_SDIO_IRQ, .tmio_ocr_mask = MMC_VDD_32_33 | MMC_VDD_33_34, .set_pwr = ag5evm_sdhi1_set_pwr, -- cgit v1.2.3 From 1dd75f91ae713049eb6baaa640078f3a6549e522 Mon Sep 17 00:00:00 2001 From: "jhbird.choi@samsung.com" Date: Thu, 21 Jul 2011 15:29:14 +0900 Subject: genirq: Fix wrong bit operation (!msk & 0x01) should be !(msk & 0x01) Signed-off-by: Jonghwan Choi Link: http://lkml.kernel.org/r/1311229754-6003-1-git-send-email-jhbird.choi@samsung.com Signed-off-by: Thomas Gleixner Cc: stable@kernel.org --- kernel/irq/generic-chip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c index 3a2cab407b93..e38544dddb18 100644 --- a/kernel/irq/generic-chip.c +++ b/kernel/irq/generic-chip.c @@ -246,7 +246,7 @@ void irq_setup_generic_chip(struct irq_chip_generic *gc, u32 msk, gc->mask_cache = irq_reg_readl(gc->reg_base + ct->regs.mask); for (i = gc->irq_base; msk; msk >>= 1, i++) { - if (!msk & 0x01) + if (!(msk & 0x01)) continue; if (flags & IRQ_GC_INIT_NESTED_LOCK) @@ -301,7 +301,7 @@ void irq_remove_generic_chip(struct irq_chip_generic *gc, u32 msk, raw_spin_unlock(&gc_lock); for (; msk; msk >>= 1, i++) { - if (!msk & 0x01) + if (!(msk & 0x01)) continue; /* Remove handler first. That will mask the irq line */ -- cgit v1.2.3 From 53cc2820acbdbcc768675bfaff321f3a8680a317 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Jul 2011 09:12:50 +0000 Subject: rtc: Handle errors correctly in rtc_irq_set_state() In rtc_irq_set_state, the code checks the correctness of the parameters, but then goes on to unconditionally arms/disarms the hrtimer. Thus a random task might arm/disarm rtc timer and surprise the real owner by either generating events or by stopping them. Cc: stable@kernel.org Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- drivers/rtc/interface.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index df68618f6dbb..b6bf57f25cc9 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -656,6 +656,8 @@ int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled err = -EBUSY; if (rtc->irq_task != task) err = -EACCES; + if (err) + goto out; if (enabled) { ktime_t period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq); @@ -664,6 +666,7 @@ int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled hrtimer_cancel(&rtc->pie_timer); } rtc->pie_enabled = enabled; +out: spin_unlock_irqrestore(&rtc->irq_task_lock, flags); return err; -- cgit v1.2.3 From 3c8bb90efb6e3105206e4aaa9127395feeda5492 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Jul 2011 09:12:51 +0000 Subject: rtc: Fix hrtimer deadlock Ben reported a lockup related to rtc. The lockup happens due to: CPU0 CPU1 rtc_irq_set_state() __run_hrtimer() spin_lock_irqsave(&rtc->irq_task_lock) rtc_handle_legacy_irq(); spin_lock(&rtc->irq_task_lock); hrtimer_cancel() while (callback_running); So the running callback never finishes as it's blocked on rtc->irq_task_lock. Use hrtimer_try_to_cancel() instead and drop rtc->irq_task_lock while waiting for the callback. Fix this for both rtc_irq_set_state() and rtc_irq_set_freq(). Cc: stable@kernel.org Reported-by: Ben Greear Signed-off-by: Thomas Gleixner Signed-off-by: John Stultz --- drivers/rtc/interface.c | 56 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 37 insertions(+), 19 deletions(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index b6bf57f25cc9..a1ba2caa8308 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -636,6 +636,29 @@ void rtc_irq_unregister(struct rtc_device *rtc, struct rtc_task *task) } EXPORT_SYMBOL_GPL(rtc_irq_unregister); +static int rtc_update_hrtimer(struct rtc_device *rtc, int enabled) +{ + /* + * We always cancel the timer here first, because otherwise + * we could run into BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); + * when we manage to start the timer before the callback + * returns HRTIMER_RESTART. + * + * We cannot use hrtimer_cancel() here as a running callback + * could be blocked on rtc->irq_task_lock and hrtimer_cancel() + * would spin forever. + */ + if (hrtimer_try_to_cancel(&rtc->pie_timer) < 0) + return -1; + + if (enabled) { + ktime_t period = ktime_set(0, NSEC_PER_SEC / rtc->irq_freq); + + hrtimer_start(&rtc->pie_timer, period, HRTIMER_MODE_REL); + } + return 0; +} + /** * rtc_irq_set_state - enable/disable 2^N Hz periodic IRQs * @rtc: the rtc device @@ -651,24 +674,21 @@ int rtc_irq_set_state(struct rtc_device *rtc, struct rtc_task *task, int enabled int err = 0; unsigned long flags; +retry: spin_lock_irqsave(&rtc->irq_task_lock, flags); if (rtc->irq_task != NULL && task == NULL) err = -EBUSY; if (rtc->irq_task != task) err = -EACCES; - if (err) - goto out; - - if (enabled) { - ktime_t period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq); - hrtimer_start(&rtc->pie_timer, period, HRTIMER_MODE_REL); - } else { - hrtimer_cancel(&rtc->pie_timer); + if (!err) { + if (rtc_update_hrtimer(rtc, enabled) < 0) { + spin_unlock_irqrestore(&rtc->irq_task_lock, flags); + cpu_relax(); + goto retry; + } + rtc->pie_enabled = enabled; } - rtc->pie_enabled = enabled; -out: spin_unlock_irqrestore(&rtc->irq_task_lock, flags); - return err; } EXPORT_SYMBOL_GPL(rtc_irq_set_state); @@ -690,20 +710,18 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq) if (freq <= 0) return -EINVAL; - +retry: spin_lock_irqsave(&rtc->irq_task_lock, flags); if (rtc->irq_task != NULL && task == NULL) err = -EBUSY; if (rtc->irq_task != task) err = -EACCES; - if (err == 0) { + if (!err) { rtc->irq_freq = freq; - if (rtc->pie_enabled) { - ktime_t period; - hrtimer_cancel(&rtc->pie_timer); - period = ktime_set(0, NSEC_PER_SEC/rtc->irq_freq); - hrtimer_start(&rtc->pie_timer, period, - HRTIMER_MODE_REL); + if (rtc->pie_enabled && rtc_update_hrtimer(rtc, 1) < 0) { + spin_unlock_irqrestore(&rtc->irq_task_lock, flags); + cpu_relax(); + goto retry; } } spin_unlock_irqrestore(&rtc->irq_task_lock, flags); -- cgit v1.2.3 From 6e7a333eaa522ef73be01caec7a01521490aaf00 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 22 Jul 2011 09:12:51 +0000 Subject: rtc: Limit RTC PIE frequency The RTC pie hrtimer is self rearming. We really need to limit the frequency to something sensible. Thus limit it to the 8192Hz max value from the rtc man documentation Cc: Willy Tarreau Cc: stable@kernel.org Signed-off-by: Thomas Gleixner [jstultz: slightly reworked to use RTC_MAX_FREQ value] Signed-off-by: John Stultz --- drivers/rtc/interface.c | 2 +- include/linux/rtc.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index a1ba2caa8308..44e91e598f8d 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq) int err = 0; unsigned long flags; - if (freq <= 0) + if (freq <= 0 || freq > RTC_MAX_FREQ) return -EINVAL; retry: spin_lock_irqsave(&rtc->irq_task_lock, flags); diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b27ebea25660..93f4d035076b 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -97,6 +97,9 @@ struct rtc_pll_info { #define RTC_AF 0x20 /* Alarm interrupt */ #define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */ + +#define RTC_MAX_FREQ 8192 + #ifdef __KERNEL__ #include -- cgit v1.2.3 From fd079facb3fdd1b0517f0b2087ac05c30ea09cfe Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 25 Jul 2011 11:01:09 -0700 Subject: KVM: fix TASK_DELAY_ACCT kconfig warning Fix kconfig dependency warning: warning: (KVM) selects TASK_DELAY_ACCT which has unmet direct dependencies (TASKSTATS) Signed-off-by: Randy Dunlap Signed-off-by: Avi Kivity --- arch/x86/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 988724b236b6..0a09b58bb1cb 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -31,6 +31,7 @@ config KVM select KVM_ASYNC_PF select USER_RETURN_NOTIFIER select KVM_MMIO + select TASKSTATS select TASK_DELAY_ACCT ---help--- Support hosting fully virtualized guest machines using hardware -- cgit v1.2.3 From f3637a5f2e2eb391ff5757bc83fb5de8f9726464 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 7 Jul 2011 22:32:17 +0200 Subject: irq: Always set IRQF_ONESHOT if no primary handler is specified If no primary handler is specified then a default one is assigned which always returns IRQ_WAKE_THREAD. This handler requires the IRQF_ONESHOT flag on LEVEL / EIO typed irqs because the source of interrupt is not disabled. Since it is required for those users and there is no difference for others it makes sense to add this flag unconditionally. Signed-off-by: Sebastian Andrzej Siewior Link: http://lkml.kernel.org/r/1310070737-18514-1-git-send-email-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/irq/manage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 0a7840aeb0fb..3f9cd4799da7 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1322,6 +1322,7 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, if (!thread_fn) return -EINVAL; handler = irq_default_primary_handler; + irqflags |= IRQF_ONESHOT; } action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); -- cgit v1.2.3 From b6873807a7143b7d6d8b06809295e559d07d7deb Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 11 Jul 2011 12:17:31 +0200 Subject: irq: Track the owner of irq descriptor Interrupt descriptors can be allocated from modules. The interrupts are used by other modules, but we have no refcount on the module which provides the interrupts and there is no way to establish one on the device level as the interrupt using module is agnostic to the fact that the interrupt is provided by a module rather than by some builtin interrupt controller. To prevent removal of the interrupt providing module, we can track the owner of the interrupt descriptor, which also provides the relevant irq chip functions in the irq descriptor. request/setup_irq() can now acquire a refcount on the owner module to prevent unloading. free_irq() drops the refcount. Signed-off-by: Sebastian Andrzej Siewior Link: http://lkml.kernel.org/r/20110711101731.GA13804@Chamillionaire.breakpoint.cc Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 11 ++++++++++- include/linux/irqdesc.h | 1 + kernel/irq/irqdesc.c | 36 ++++++++++++++++++++++++------------ kernel/irq/manage.c | 17 +++++++++++++---- 4 files changed, 48 insertions(+), 17 deletions(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index baa397eb9c33..16d6f54ef1dd 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -546,7 +547,15 @@ static inline struct msi_desc *irq_data_get_msi(struct irq_data *d) return d->msi_desc; } -int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node); +int __irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, + struct module *owner); + +static inline int irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, + int node) +{ + return __irq_alloc_descs(irq, from, cnt, node, THIS_MODULE); +} + void irq_free_descs(unsigned int irq, unsigned int cnt); int irq_reserve_irqs(unsigned int from, unsigned int cnt); diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 2d921b35212c..150134ac709a 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -66,6 +66,7 @@ struct irq_desc { #ifdef CONFIG_PROC_FS struct proc_dir_entry *dir; #endif + struct module *owner; const char *name; } ____cacheline_internodealigned_in_smp; diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 4c60a50e66b2..cb65d0360e31 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -70,7 +70,8 @@ static inline void desc_smp_init(struct irq_desc *desc, int node) { } static inline int desc_node(struct irq_desc *desc) { return 0; } #endif -static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) +static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node, + struct module *owner) { int cpu; @@ -86,6 +87,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node) desc->irq_count = 0; desc->irqs_unhandled = 0; desc->name = NULL; + desc->owner = owner; for_each_possible_cpu(cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; desc_smp_init(desc, node); @@ -128,7 +130,7 @@ static void free_masks(struct irq_desc *desc) static inline void free_masks(struct irq_desc *desc) { } #endif -static struct irq_desc *alloc_desc(int irq, int node) +static struct irq_desc *alloc_desc(int irq, int node, struct module *owner) { struct irq_desc *desc; gfp_t gfp = GFP_KERNEL; @@ -147,7 +149,7 @@ static struct irq_desc *alloc_desc(int irq, int node) raw_spin_lock_init(&desc->lock); lockdep_set_class(&desc->lock, &irq_desc_lock_class); - desc_set_defaults(irq, desc, node); + desc_set_defaults(irq, desc, node, owner); return desc; @@ -173,13 +175,14 @@ static void free_desc(unsigned int irq) kfree(desc); } -static int alloc_descs(unsigned int start, unsigned int cnt, int node) +static int alloc_descs(unsigned int start, unsigned int cnt, int node, + struct module *owner) { struct irq_desc *desc; int i; for (i = 0; i < cnt; i++) { - desc = alloc_desc(start + i, node); + desc = alloc_desc(start + i, node, owner); if (!desc) goto err; mutex_lock(&sparse_irq_lock); @@ -227,7 +230,7 @@ int __init early_irq_init(void) nr_irqs = initcnt; for (i = 0; i < initcnt; i++) { - desc = alloc_desc(i, node); + desc = alloc_desc(i, node, NULL); set_bit(i, allocated_irqs); irq_insert_desc(i, desc); } @@ -261,7 +264,7 @@ int __init early_irq_init(void) alloc_masks(&desc[i], GFP_KERNEL, node); raw_spin_lock_init(&desc[i].lock); lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - desc_set_defaults(i, &desc[i], node); + desc_set_defaults(i, &desc[i], node, NULL); } return arch_early_irq_init(); } @@ -276,8 +279,16 @@ static void free_desc(unsigned int irq) dynamic_irq_cleanup(irq); } -static inline int alloc_descs(unsigned int start, unsigned int cnt, int node) +static inline int alloc_descs(unsigned int start, unsigned int cnt, int node, + struct module *owner) { + u32 i; + + for (i = 0; i < cnt; i++) { + struct irq_desc *desc = irq_to_desc(start + i); + + desc->owner = owner; + } return start; } @@ -337,7 +348,8 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * Returns the first irq number or error code */ int __ref -irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) +__irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node, + struct module *owner) { int start, ret; @@ -366,13 +378,13 @@ irq_alloc_descs(int irq, unsigned int from, unsigned int cnt, int node) bitmap_set(allocated_irqs, start, cnt); mutex_unlock(&sparse_irq_lock); - return alloc_descs(start, cnt, node); + return alloc_descs(start, cnt, node, owner); err: mutex_unlock(&sparse_irq_lock); return ret; } -EXPORT_SYMBOL_GPL(irq_alloc_descs); +EXPORT_SYMBOL_GPL(__irq_alloc_descs); /** * irq_reserve_irqs - mark irqs allocated @@ -440,7 +452,7 @@ void dynamic_irq_cleanup(unsigned int irq) unsigned long flags; raw_spin_lock_irqsave(&desc->lock, flags); - desc_set_defaults(irq, desc, desc_node(desc)); + desc_set_defaults(irq, desc, desc_node(desc), NULL); raw_spin_unlock_irqrestore(&desc->lock, flags); } diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 3f9cd4799da7..2e9425889fa8 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -883,6 +883,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) if (desc->irq_data.chip == &no_irq_chip) return -ENOSYS; + if (!try_module_get(desc->owner)) + return -ENODEV; /* * Some drivers like serial.c use request_irq() heavily, * so we have to be careful not to interfere with a @@ -906,8 +908,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) */ nested = irq_settings_is_nested_thread(desc); if (nested) { - if (!new->thread_fn) - return -EINVAL; + if (!new->thread_fn) { + ret = -EINVAL; + goto out_mput; + } /* * Replace the primary handler which was provided from * the driver for non nested interrupt handling by the @@ -929,8 +933,10 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) t = kthread_create(irq_thread, new, "irq/%d-%s", irq, new->name); - if (IS_ERR(t)) - return PTR_ERR(t); + if (IS_ERR(t)) { + ret = PTR_ERR(t); + goto out_mput; + } /* * We keep the reference to the task struct even if * the thread dies to avoid that the interrupt code @@ -1095,6 +1101,8 @@ out_thread: kthread_stop(t); put_task_struct(t); } +out_mput: + module_put(desc->owner); return ret; } @@ -1203,6 +1211,7 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id) put_task_struct(action->thread); } + module_put(desc->owner); return action; } -- cgit v1.2.3 From 00fe1ae91e0d69e52e8212d23cd3ecc74a7259a0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 29 Jul 2011 16:24:46 +0200 Subject: netfilter: xt_rateest: fix xt_rateest_mt_checkentry() commit 4a5a5c73b7cfee (slightly better error reporting) added some useless code in xt_rateest_mt_checkentry(). Fix this so that different error codes can really be returned. Signed-off-by: Eric Dumazet CC: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/xt_rateest.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index 76a083184d8e..ed0db15ab00e 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -78,7 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; - int ret = false; + int ret = -EINVAL; if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | XT_RATEEST_MATCH_REL)) != 1) @@ -101,13 +101,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) if (!est1) goto err1; + est2 = NULL; if (info->flags & XT_RATEEST_MATCH_REL) { est2 = xt_rateest_lookup(info->name2); if (!est2) goto err2; - } else - est2 = NULL; - + } info->est1 = est1; info->est2 = est2; @@ -116,7 +115,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) err2: xt_rateest_put(est1); err1: - return -EINVAL; + return ret; } static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) -- cgit v1.2.3 From 91c66c6893a3e2bb8a88a30cb76007d5d49d32c9 Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Fri, 29 Jul 2011 16:38:49 +0200 Subject: netfilter: ip_queue: Fix small leak in ipq_build_packet_message() ipq_build_packet_message() in net/ipv4/netfilter/ip_queue.c and net/ipv6/netfilter/ip6_queue.c contain a small potential mem leak as far as I can tell. We allocate memory for 'skb' with alloc_skb() annd then call nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); NLMSG_PUT is a macro NLMSG_PUT(skb, pid, seq, type, len) \ NLMSG_NEW(skb, pid, seq, type, len, 0) that expands to NLMSG_NEW, which is also a macro which expands to: NLMSG_NEW(skb, pid, seq, type, len, flags) \ ({ if (unlikely(skb_tailroom(skb) < (int)NLMSG_SPACE(len))) \ goto nlmsg_failure; \ __nlmsg_put(skb, pid, seq, type, len, flags); }) If we take the true branch of the 'if' statement and 'goto nlmsg_failure', then we'll, at that point, return from ipq_build_packet_message() without having assigned 'skb' to anything and we'll leak the memory we allocated for it when it goes out of scope. Fix this by placing a 'kfree(skb)' at 'nlmsg_failure'. I admit that I do not know how likely this to actually happen or even if there's something that guarantees that it will never happen - I'm not that familiar with this code, but if that is so, I've not been able to spot it. Signed-off-by: Jesper Juhl Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 1 + net/ipv6/netfilter/ip6_queue.c | 1 + 2 files changed, 2 insertions(+) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 5c9b9d963918..48f7d5b4ff37 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip_queue: error creating packet message\n"); return NULL; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 249394863284..87b243a25afa 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -218,6 +218,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: + kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip6_queue: error creating packet message\n"); return NULL; -- cgit v1.2.3 From 9823d9ff483af4ce8804a9eb69600ca739cd1f58 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Fri, 29 Jul 2011 16:40:30 +0200 Subject: netfilter: ebtables: fix ebtables build dependency MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The configuration of ebtables shouldn't depend on CONFIG_BRIDGE_NETFILTER, only on CONFIG_NETFILTER. Reported-by: Sbastien Laveze Signed-off-by: Bart De Schuymer Signed-off-by: Patrick McHardy --- net/bridge/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index ba6f73eb06c6..a9aff9c7d027 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -4,7 +4,7 @@ menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" - depends on BRIDGE && BRIDGE_NETFILTER + depends on BRIDGE && NETFILTER select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification -- cgit v1.2.3 From aa387cc895672b00f807ad7c734a2defaf677712 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 31 Jul 2011 22:05:09 +0200 Subject: block: add bsg helper library This moves the FC classes bsg code to the block layer and makes it a lib so that other classes like iscsi and SAS can use it. It is helpful because working with the request queue, bios, creating scatterlists, etc are a pain that the LLD does not have to worry about with normal IOs and should not have to worry about for bsg requests. Signed-off-by: Mike Christie Signed-off-by: Jens Axboe --- block/Kconfig | 10 ++ block/Makefile | 1 + block/bsg-lib.c | 297 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 4 + include/linux/bsg-lib.h | 73 ++++++++++++ 5 files changed, 385 insertions(+) create mode 100644 block/bsg-lib.c create mode 100644 include/linux/bsg-lib.h diff --git a/block/Kconfig b/block/Kconfig index 60be1e0455da..e97934eececa 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -65,6 +65,16 @@ config BLK_DEV_BSG If unsure, say Y. +config BLK_DEV_BSGLIB + bool "Block layer SG support v4 helper lib" + default n + select BLK_DEV_BSG + help + Subsystems will normally enable this if needed. Users will not + normally need to manually enable this. + + If unsure, say N. + config BLK_DEV_INTEGRITY bool "Block layer data integrity support" ---help--- diff --git a/block/Makefile b/block/Makefile index 0fec4b3fab51..514c6e4f427a 100644 --- a/block/Makefile +++ b/block/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o +obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/bsg-lib.c b/block/bsg-lib.c new file mode 100644 index 000000000000..f8c0a61a529c --- /dev/null +++ b/block/bsg-lib.c @@ -0,0 +1,297 @@ +/* + * BSG helper library + * + * Copyright (C) 2008 James Smart, Emulex Corporation + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2011 Mike Christie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include +#include +#include +#include +#include +#include + +/** + * bsg_destroy_job - routine to teardown/delete a bsg job + * @job: bsg_job that is to be torn down + */ +static void bsg_destroy_job(struct bsg_job *job) +{ + put_device(job->dev); /* release reference for the request */ + + kfree(job->request_payload.sg_list); + kfree(job->reply_payload.sg_list); + kfree(job); +} + +/** + * bsg_job_done - completion routine for bsg requests + * @job: bsg_job that is complete + * @result: job reply result + * @reply_payload_rcv_len: length of payload recvd + * + * The LLD should call this when the bsg job has completed. + */ +void bsg_job_done(struct bsg_job *job, int result, + unsigned int reply_payload_rcv_len) +{ + struct request *req = job->req; + struct request *rsp = req->next_rq; + int err; + + err = job->req->errors = result; + if (err < 0) + /* we're only returning the result field in the reply */ + job->req->sense_len = sizeof(u32); + else + job->req->sense_len = job->reply_len; + /* we assume all request payload was transferred, residual == 0 */ + req->resid_len = 0; + + if (rsp) { + WARN_ON(reply_payload_rcv_len > rsp->resid_len); + + /* set reply (bidi) residual */ + rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len); + } + blk_complete_request(req); +} +EXPORT_SYMBOL_GPL(bsg_job_done); + +/** + * bsg_softirq_done - softirq done routine for destroying the bsg requests + * @rq: BSG request that holds the job to be destroyed + */ +static void bsg_softirq_done(struct request *rq) +{ + struct bsg_job *job = rq->special; + + blk_end_request_all(rq, rq->errors); + bsg_destroy_job(job); +} + +static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) +{ + size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments); + + BUG_ON(!req->nr_phys_segments); + + buf->sg_list = kzalloc(sz, GFP_KERNEL); + if (!buf->sg_list) + return -ENOMEM; + sg_init_table(buf->sg_list, req->nr_phys_segments); + buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list); + buf->payload_len = blk_rq_bytes(req); + return 0; +} + +/** + * bsg_create_job - create the bsg_job structure for the bsg request + * @dev: device that is being sent the bsg request + * @req: BSG request that needs a job structure + */ +static int bsg_create_job(struct device *dev, struct request *req) +{ + struct request *rsp = req->next_rq; + struct request_queue *q = req->q; + struct bsg_job *job; + int ret; + + BUG_ON(req->special); + + job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); + if (!job) + return -ENOMEM; + + req->special = job; + job->req = req; + if (q->bsg_job_size) + job->dd_data = (void *)&job[1]; + job->request = req->cmd; + job->request_len = req->cmd_len; + job->reply = req->sense; + job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer + * allocated */ + if (req->bio) { + ret = bsg_map_buffer(&job->request_payload, req); + if (ret) + goto failjob_rls_job; + } + if (rsp && rsp->bio) { + ret = bsg_map_buffer(&job->reply_payload, rsp); + if (ret) + goto failjob_rls_rqst_payload; + } + job->dev = dev; + /* take a reference for the request */ + get_device(job->dev); + return 0; + +failjob_rls_rqst_payload: + kfree(job->request_payload.sg_list); +failjob_rls_job: + kfree(job); + return -ENOMEM; +} + +/* + * bsg_goose_queue - restart queue in case it was stopped + * @q: request q to be restarted + */ +void bsg_goose_queue(struct request_queue *q) +{ + if (!q) + return; + + blk_run_queue_async(q); +} +EXPORT_SYMBOL_GPL(bsg_goose_queue); + +/** + * bsg_request_fn - generic handler for bsg requests + * @q: request queue to manage + * + * On error the create_bsg_job function should return a -Exyz error value + * that will be set to the req->errors. + * + * Drivers/subsys should pass this to the queue init function. + */ +void bsg_request_fn(struct request_queue *q) +{ + struct device *dev = q->queuedata; + struct request *req; + struct bsg_job *job; + int ret; + + if (!get_device(dev)) + return; + + while (1) { + req = blk_fetch_request(q); + if (!req) + break; + spin_unlock_irq(q->queue_lock); + + ret = bsg_create_job(dev, req); + if (ret) { + req->errors = ret; + blk_end_request_all(req, ret); + spin_lock_irq(q->queue_lock); + continue; + } + + job = req->special; + ret = q->bsg_job_fn(job); + spin_lock_irq(q->queue_lock); + if (ret) + break; + } + + spin_unlock_irq(q->queue_lock); + put_device(dev); + spin_lock_irq(q->queue_lock); +} +EXPORT_SYMBOL_GPL(bsg_request_fn); + +/** + * bsg_setup_queue - Create and add the bsg hooks so we can receive requests + * @dev: device to attach bsg device to + * @q: request queue setup by caller + * @name: device to give bsg device + * @job_fn: bsg job handler + * @dd_job_size: size of LLD data needed for each job + * + * The caller should have setup the reuqest queue with bsg_request_fn + * as the request_fn. + */ +int bsg_setup_queue(struct device *dev, struct request_queue *q, + char *name, bsg_job_fn *job_fn, int dd_job_size) +{ + int ret; + + q->queuedata = dev; + q->bsg_job_size = dd_job_size; + q->bsg_job_fn = job_fn; + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); + blk_queue_softirq_done(q, bsg_softirq_done); + blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); + + ret = bsg_register_queue(q, dev, name, NULL); + if (ret) { + printk(KERN_ERR "%s: bsg interface failed to " + "initialize - register queue\n", dev->kobj.name); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(bsg_setup_queue); + +/** + * bsg_remove_queue - Deletes the bsg dev from the q + * @q: the request_queue that is to be torn down. + * + * Notes: + * Before unregistering the queue empty any requests that are blocked + */ +void bsg_remove_queue(struct request_queue *q) +{ + struct request *req; /* block request */ + int counts; /* totals for request_list count and starved */ + + if (!q) + return; + + /* Stop taking in new requests */ + spin_lock_irq(q->queue_lock); + blk_stop_queue(q); + + /* drain all requests in the queue */ + while (1) { + /* need the lock to fetch a request + * this may fetch the same reqeust as the previous pass + */ + req = blk_fetch_request(q); + /* save requests in use and starved */ + counts = q->rq.count[0] + q->rq.count[1] + + q->rq.starved[0] + q->rq.starved[1]; + spin_unlock_irq(q->queue_lock); + /* any requests still outstanding? */ + if (counts == 0) + break; + + /* This may be the same req as the previous iteration, + * always send the blk_end_request_all after a prefetch. + * It is not okay to not end the request because the + * prefetch started the request. + */ + if (req) { + /* return -ENXIO to indicate that this queue is + * going away + */ + req->errors = -ENXIO; + blk_end_request_all(req, -ENXIO); + } + + msleep(200); /* allow bsg to possibly finish */ + spin_lock_irq(q->queue_lock); + } + bsg_unregister_queue(q); +} +EXPORT_SYMBOL_GPL(bsg_remove_queue); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e67c45b3bc9..847928546076 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -30,6 +30,7 @@ struct request_pm_state; struct blk_trace; struct request; struct sg_io_hdr; +struct bsg_job; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -209,6 +210,7 @@ typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); typedef int (lld_busy_fn) (struct request_queue *q); +typedef int (bsg_job_fn) (struct bsg_job *); enum blk_eh_timer_return { BLK_EH_NOT_HANDLED, @@ -375,6 +377,8 @@ struct request_queue { struct mutex sysfs_lock; #if defined(CONFIG_BLK_DEV_BSG) + bsg_job_fn *bsg_job_fn; + int bsg_job_size; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h new file mode 100644 index 000000000000..f55ab8cdc106 --- /dev/null +++ b/include/linux/bsg-lib.h @@ -0,0 +1,73 @@ +/* + * BSG helper library + * + * Copyright (C) 2008 James Smart, Emulex Corporation + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2011 Mike Christie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#ifndef _BLK_BSG_ +#define _BLK_BSG_ + +#include + +struct request; +struct device; +struct scatterlist; +struct request_queue; + +struct bsg_buffer { + unsigned int payload_len; + int sg_cnt; + struct scatterlist *sg_list; +}; + +struct bsg_job { + struct device *dev; + struct request *req; + + /* Transport/driver specific request/reply structs */ + void *request; + void *reply; + + unsigned int request_len; + unsigned int reply_len; + /* + * On entry : reply_len indicates the buffer size allocated for + * the reply. + * + * Upon completion : the message handler must set reply_len + * to indicates the size of the reply to be returned to the + * caller. + */ + + /* DMA payloads for the request/response */ + struct bsg_buffer request_payload; + struct bsg_buffer reply_payload; + + void *dd_data; /* Used for driver-specific storage */ +}; + +void bsg_job_done(struct bsg_job *job, int result, + unsigned int reply_payload_rcv_len); +int bsg_setup_queue(struct device *dev, struct request_queue *q, char *name, + bsg_job_fn *job_fn, int dd_job_size); +void bsg_request_fn(struct request_queue *q); +void bsg_remove_queue(struct request_queue *q); +void bsg_goose_queue(struct request_queue *q); + +#endif -- cgit v1.2.3 From 34dd82afd27da2537199d7f71f1542501c6f96e7 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2011 22:08:04 +0200 Subject: loop: replace linked list of allocated devices with an idr index Replace the linked list, that keeps track of allocated devices, with an idr index to allow a more efficient lookup of devices. Cc: Tejun Heo Signed-off-by: Kay Sievers Signed-off-by: Jens Axboe --- drivers/block/loop.c | 152 +++++++++++++++++++++++++++------------------------ include/linux/loop.h | 1 - 2 files changed, 80 insertions(+), 73 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 76c8da78212b..f58532e77777 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -78,8 +78,8 @@ #include -static LIST_HEAD(loop_devices); -static DEFINE_MUTEX(loop_devices_mutex); +static DEFINE_IDR(loop_index_idr); +static DEFINE_MUTEX(loop_index_mutex); static int max_part; static int part_shift; @@ -722,17 +722,10 @@ static inline int is_loop_device(struct file *file) static ssize_t loop_attr_show(struct device *dev, char *page, ssize_t (*callback)(struct loop_device *, char *)) { - struct loop_device *l, *lo = NULL; - - mutex_lock(&loop_devices_mutex); - list_for_each_entry(l, &loop_devices, lo_list) - if (disk_to_dev(l->lo_disk) == dev) { - lo = l; - break; - } - mutex_unlock(&loop_devices_mutex); + struct gendisk *disk = dev_to_disk(dev); + struct loop_device *lo = disk->private_data; - return lo ? callback(lo, page) : -EIO; + return callback(lo, page); } #define LOOP_ATTR_RO(_name) \ @@ -1557,40 +1550,64 @@ int loop_register_transfer(struct loop_func_table *funcs) return 0; } +static int unregister_transfer_cb(int id, void *ptr, void *data) +{ + struct loop_device *lo = ptr; + struct loop_func_table *xfer = data; + + mutex_lock(&lo->lo_ctl_mutex); + if (lo->lo_encryption == xfer) + loop_release_xfer(lo); + mutex_unlock(&lo->lo_ctl_mutex); + return 0; +} + int loop_unregister_transfer(int number) { unsigned int n = number; - struct loop_device *lo; struct loop_func_table *xfer; if (n == 0 || n >= MAX_LO_CRYPT || (xfer = xfer_funcs[n]) == NULL) return -EINVAL; xfer_funcs[n] = NULL; - - list_for_each_entry(lo, &loop_devices, lo_list) { - mutex_lock(&lo->lo_ctl_mutex); - - if (lo->lo_encryption == xfer) - loop_release_xfer(lo); - - mutex_unlock(&lo->lo_ctl_mutex); - } - + idr_for_each(&loop_index_idr, &unregister_transfer_cb, xfer); return 0; } EXPORT_SYMBOL(loop_register_transfer); EXPORT_SYMBOL(loop_unregister_transfer); -static struct loop_device *loop_alloc(int i) +static int loop_add(struct loop_device **l, int i) { struct loop_device *lo; struct gendisk *disk; + int err; lo = kzalloc(sizeof(*lo), GFP_KERNEL); - if (!lo) + if (!lo) { + err = -ENOMEM; goto out; + } + + err = idr_pre_get(&loop_index_idr, GFP_KERNEL); + if (err < 0) + goto out_free_dev; + + if (i >= 0) { + int m; + + /* create specific i in the index */ + err = idr_get_new_above(&loop_index_idr, lo, i, &m); + if (err >= 0 && i != m) { + idr_remove(&loop_index_idr, m); + err = -EEXIST; + } + } else { + err = -EINVAL; + } + if (err < 0) + goto out_free_dev; lo->lo_queue = blk_alloc_queue(GFP_KERNEL); if (!lo->lo_queue) @@ -1611,56 +1628,54 @@ static struct loop_device *loop_alloc(int i) disk->private_data = lo; disk->queue = lo->lo_queue; sprintf(disk->disk_name, "loop%d", i); - return lo; + add_disk(disk); + *l = lo; + return lo->lo_number; out_free_queue: blk_cleanup_queue(lo->lo_queue); out_free_dev: kfree(lo); out: - return NULL; + return err; } -static void loop_free(struct loop_device *lo) +static void loop_remove(struct loop_device *lo) { + del_gendisk(lo->lo_disk); blk_cleanup_queue(lo->lo_queue); put_disk(lo->lo_disk); - list_del(&lo->lo_list); kfree(lo); } -static struct loop_device *loop_init_one(int i) +static int loop_lookup(struct loop_device **l, int i) { struct loop_device *lo; + int ret = -ENODEV; - list_for_each_entry(lo, &loop_devices, lo_list) { - if (lo->lo_number == i) - return lo; - } - - lo = loop_alloc(i); + lo = idr_find(&loop_index_idr, i); if (lo) { - add_disk(lo->lo_disk); - list_add_tail(&lo->lo_list, &loop_devices); + *l = lo; + ret = lo->lo_number; } - return lo; -} - -static void loop_del_one(struct loop_device *lo) -{ - del_gendisk(lo->lo_disk); - loop_free(lo); + return ret; } static struct kobject *loop_probe(dev_t dev, int *part, void *data) { struct loop_device *lo; struct kobject *kobj; + int err; - mutex_lock(&loop_devices_mutex); - lo = loop_init_one(MINOR(dev) >> part_shift); - kobj = lo ? get_disk(lo->lo_disk) : ERR_PTR(-ENOMEM); - mutex_unlock(&loop_devices_mutex); + mutex_lock(&loop_index_mutex); + err = loop_lookup(&lo, MINOR(dev) >> part_shift); + if (err < 0) + err = loop_add(&lo, MINOR(dev) >> part_shift); + if (err < 0) + kobj = ERR_PTR(err); + else + kobj = get_disk(lo->lo_disk); + mutex_unlock(&loop_index_mutex); *part = 0; return kobj; @@ -1670,7 +1685,7 @@ static int __init loop_init(void) { int i, nr; unsigned long range; - struct loop_device *lo, *next; + struct loop_device *lo; /* * loop module now has a feature to instantiate underlying device @@ -1719,43 +1734,36 @@ static int __init loop_init(void) if (register_blkdev(LOOP_MAJOR, "loop")) return -EIO; - for (i = 0; i < nr; i++) { - lo = loop_alloc(i); - if (!lo) - goto Enomem; - list_add_tail(&lo->lo_list, &loop_devices); - } - - /* point of no return */ - - list_for_each_entry(lo, &loop_devices, lo_list) - add_disk(lo->lo_disk); - blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); + /* pre-create number devices of devices given by config or max_loop */ + mutex_lock(&loop_index_mutex); + for (i = 0; i < nr; i++) + loop_add(&lo, i); + mutex_unlock(&loop_index_mutex); + printk(KERN_INFO "loop: module loaded\n"); return 0; +} -Enomem: - printk(KERN_INFO "loop: out of memory\n"); - - list_for_each_entry_safe(lo, next, &loop_devices, lo_list) - loop_free(lo); +static int loop_exit_cb(int id, void *ptr, void *data) +{ + struct loop_device *lo = ptr; - unregister_blkdev(LOOP_MAJOR, "loop"); - return -ENOMEM; + loop_remove(lo); + return 0; } static void __exit loop_exit(void) { unsigned long range; - struct loop_device *lo, *next; range = max_loop ? max_loop << part_shift : 1UL << MINORBITS; - list_for_each_entry_safe(lo, next, &loop_devices, lo_list) - loop_del_one(lo); + idr_for_each(&loop_index_idr, &loop_exit_cb, NULL); + idr_remove_all(&loop_index_idr); + idr_destroy(&loop_index_idr); blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); diff --git a/include/linux/loop.h b/include/linux/loop.h index 66c194e2d9b9..5f08d18fa148 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -64,7 +64,6 @@ struct loop_device { struct request_queue *lo_queue; struct gendisk *lo_disk; - struct list_head lo_list; }; #endif /* __KERNEL__ */ -- cgit v1.2.3 From 770fe30a46a12b6fb6b63fbe1737654d28e84844 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2011 22:08:04 +0200 Subject: loop: add management interface for on-demand device allocation Loop devices today have a fixed pre-allocated number of usually 8. The number can only be changed at module init time. To find a free device to use, /dev/loop%i needs to be scanned, and all devices need to be opened until a free one is possibly found. This adds a new /dev/loop-control device node, that allows to dynamically find or allocate a free device, and to add and remove loop devices from the running system: LOOP_CTL_ADD adds a specific device. Arg is the number of the device. It returns the device i or a negative error code. LOOP_CTL_REMOVE removes a specific device, Arg is the number the device. It returns the device i or a negative error code. LOOP_CTL_GET_FREE finds the next unbound device or allocates a new one. No arg is given. It returns the device i or a negative error code. The loop kernel module gets automatically loaded when /dev/loop-control is accessed the first time. The alias specified in the module, instructs udev to create this 'dead' device node, even when the module is not loaded. Example: cfd = open("/dev/loop-control", O_RDWR); # add a new specific loop device err = ioctl(cfd, LOOP_CTL_ADD, devnr); # remove a specific loop device err = ioctl(cfd, LOOP_CTL_REMOVE, devnr); # find or allocate a free loop device to use devnr = ioctl(cfd, LOOP_CTL_GET_FREE); sprintf(loopname, "/dev/loop%i", devnr); ffd = open("backing-file", O_RDWR); lfd = open(loopname, O_RDWR); err = ioctl(lfd, LOOP_SET_FD, ffd); Cc: Tejun Heo Cc: Karel Zak Signed-off-by: Kay Sievers Signed-off-by: Jens Axboe --- drivers/block/loop.c | 120 +++++++++++++++++++++++++++++++++++++++++++-- include/linux/loop.h | 4 ++ include/linux/miscdevice.h | 1 + 3 files changed, 121 insertions(+), 4 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f58532e77777..5c9edf944879 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -75,7 +75,7 @@ #include #include #include - +#include #include static DEFINE_IDR(loop_index_idr); @@ -1478,13 +1478,22 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, static int lo_open(struct block_device *bdev, fmode_t mode) { - struct loop_device *lo = bdev->bd_disk->private_data; + struct loop_device *lo; + int err = 0; + + mutex_lock(&loop_index_mutex); + lo = bdev->bd_disk->private_data; + if (!lo) { + err = -ENXIO; + goto out; + } mutex_lock(&lo->lo_ctl_mutex); lo->lo_refcnt++; mutex_unlock(&lo->lo_ctl_mutex); - - return 0; +out: + mutex_unlock(&loop_index_mutex); + return err; } static int lo_release(struct gendisk *disk, fmode_t mode) @@ -1603,6 +1612,13 @@ static int loop_add(struct loop_device **l, int i) idr_remove(&loop_index_idr, m); err = -EEXIST; } + } else if (i == -1) { + int m; + + /* get next free nr */ + err = idr_get_new(&loop_index_idr, lo, &m); + if (err >= 0) + i = m; } else { err = -EINVAL; } @@ -1648,16 +1664,41 @@ static void loop_remove(struct loop_device *lo) kfree(lo); } +static int find_free_cb(int id, void *ptr, void *data) +{ + struct loop_device *lo = ptr; + struct loop_device **l = data; + + if (lo->lo_state == Lo_unbound) { + *l = lo; + return 1; + } + return 0; +} + static int loop_lookup(struct loop_device **l, int i) { struct loop_device *lo; int ret = -ENODEV; + if (i < 0) { + int err; + + err = idr_for_each(&loop_index_idr, &find_free_cb, &lo); + if (err == 1) { + *l = lo; + ret = lo->lo_number; + } + goto out; + } + + /* lookup and return a specific i */ lo = idr_find(&loop_index_idr, i); if (lo) { *l = lo; ret = lo->lo_number; } +out: return ret; } @@ -1681,11 +1722,76 @@ static struct kobject *loop_probe(dev_t dev, int *part, void *data) return kobj; } +static long loop_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + struct loop_device *lo; + int ret = -ENOSYS; + + mutex_lock(&loop_index_mutex); + switch (cmd) { + case LOOP_CTL_ADD: + ret = loop_lookup(&lo, parm); + if (ret >= 0) { + ret = -EEXIST; + break; + } + ret = loop_add(&lo, parm); + break; + case LOOP_CTL_REMOVE: + ret = loop_lookup(&lo, parm); + if (ret < 0) + break; + mutex_lock(&lo->lo_ctl_mutex); + if (lo->lo_state != Lo_unbound) { + ret = -EBUSY; + mutex_unlock(&lo->lo_ctl_mutex); + break; + } + if (lo->lo_refcnt > 0) { + ret = -EBUSY; + mutex_unlock(&lo->lo_ctl_mutex); + break; + } + lo->lo_disk->private_data = NULL; + mutex_unlock(&lo->lo_ctl_mutex); + idr_remove(&loop_index_idr, lo->lo_number); + loop_remove(lo); + break; + case LOOP_CTL_GET_FREE: + ret = loop_lookup(&lo, -1); + if (ret >= 0) + break; + ret = loop_add(&lo, -1); + } + mutex_unlock(&loop_index_mutex); + + return ret; +} + +static const struct file_operations loop_ctl_fops = { + .open = nonseekable_open, + .unlocked_ioctl = loop_control_ioctl, + .compat_ioctl = loop_control_ioctl, + .owner = THIS_MODULE, + .llseek = noop_llseek, +}; + +static struct miscdevice loop_misc = { + .minor = LOOP_CTRL_MINOR, + .name = "loop-control", + .fops = &loop_ctl_fops, +}; + +MODULE_ALIAS_MISCDEV(LOOP_CTRL_MINOR); +MODULE_ALIAS("devname:loop-control"); + static int __init loop_init(void) { int i, nr; unsigned long range; struct loop_device *lo; + int err; /* * loop module now has a feature to instantiate underlying device @@ -1702,6 +1808,10 @@ static int __init loop_init(void) * device on-demand. */ + err = misc_register(&loop_misc); + if (err < 0) + return err; + part_shift = 0; if (max_part > 0) { part_shift = fls(max_part); @@ -1767,6 +1877,8 @@ static void __exit loop_exit(void) blk_unregister_region(MKDEV(LOOP_MAJOR, 0), range); unregister_blkdev(LOOP_MAJOR, "loop"); + + misc_deregister(&loop_misc); } module_init(loop_init); diff --git a/include/linux/loop.h b/include/linux/loop.h index 5f08d18fa148..683d69890119 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -160,4 +160,8 @@ int loop_unregister_transfer(int number); #define LOOP_CHANGE_FD 0x4C06 #define LOOP_SET_CAPACITY 0x4C07 +/* /dev/loop-control interface */ +#define LOOP_CTL_ADD 0x4C80 +#define LOOP_CTL_REMOVE 0x4C81 +#define LOOP_CTL_GET_FREE 0x4C82 #endif diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 18fd13028ba1..c309b1ecdc1c 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -40,6 +40,7 @@ #define BTRFS_MINOR 234 #define AUTOFS_MINOR 235 #define MAPPER_CTRL_MINOR 236 +#define LOOP_CTRL_MINOR 237 #define MISC_DYNAMIC_MINOR 255 struct device; -- cgit v1.2.3 From d134b00b9acca3fb054d7c88a5f5d562ecbb42d1 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2011 22:08:04 +0200 Subject: loop: add BLK_DEV_LOOP_MIN_COUNT=%i to allow distros 0 pre-allocated loop devices Instead of unconditionally creating a fixed number of dead loop devices which need to be investigated by storage handling services, even when they are never used, we allow distros start with 0 loop devices and have losetup(8) and similar switch to the dynamic /dev/loop-control interface instead of searching /dev/loop%i for free devices. Signed-off-by: Kay Sievers Signed-off-by: Jens Axboe --- Documentation/kernel-parameters.txt | 9 ++++++--- drivers/block/Kconfig | 15 +++++++++++++++ drivers/block/loop.c | 27 ++++++++++----------------- 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4ca93898fbd3..c32851131646 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1340,9 +1340,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. it is equivalent to "nosmp", which also disables the IO APIC. - max_loop= [LOOP] Maximum number of loopback devices that can - be mounted - Format: <1-256> + max_loop= [LOOP] The number of loop block devices that get + (loop.max_loop) unconditionally pre-created at init time. The default + number is configured by BLK_DEV_LOOP_MIN_COUNT. Instead + of statically allocating a predefined number, loop + devices can be requested on-demand with the + /dev/loop-control interface. mcatest= [IA-64] diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 717d6e4e18d3..57212c5235e2 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -256,6 +256,21 @@ config BLK_DEV_LOOP Most users will answer N here. +config BLK_DEV_LOOP_MIN_COUNT + int "Number of loop devices to pre-create at init time" + depends on BLK_DEV_LOOP + default 8 + help + Static number of loop devices to be unconditionally pre-created + at init time. + + This default value can be overwritten on the kernel command + line or with module-parameter loop.max_loop. + + The historic default is 8. If a late 2011 version of losetup(8) + is used, it can be set to 0, since needed loop devices can be + dynamically allocated with the /dev/loop-control interface. + config BLK_DEV_CRYPTOLOOP tristate "Cryptoloop Support" select CRYPTO diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 5c9edf944879..3defc52f060c 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1793,21 +1793,6 @@ static int __init loop_init(void) struct loop_device *lo; int err; - /* - * loop module now has a feature to instantiate underlying device - * structure on-demand, provided that there is an access dev node. - * However, this will not work well with user space tool that doesn't - * know about such "feature". In order to not break any existing - * tool, we do the following: - * - * (1) if max_loop is specified, create that many upfront, and this - * also becomes a hard limit. - * (2) if max_loop is not specified, create 8 loop device on module - * load, user can further extend loop device by create dev node - * themselves and have kernel automatically instantiate actual - * device on-demand. - */ - err = misc_register(&loop_misc); if (err < 0) return err; @@ -1833,11 +1818,19 @@ static int __init loop_init(void) if (max_loop > 1UL << (MINORBITS - part_shift)) return -EINVAL; + /* + * If max_loop is specified, create that many devices upfront. + * This also becomes a hard limit. If max_loop is not specified, + * create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module + * init time. Loop devices can be requested on-demand with the + * /dev/loop-control interface, or be instantiated by accessing + * a 'dead' device node. + */ if (max_loop) { nr = max_loop; range = max_loop << part_shift; } else { - nr = 8; + nr = CONFIG_BLK_DEV_LOOP_MIN_COUNT; range = 1UL << MINORBITS; } @@ -1847,7 +1840,7 @@ static int __init loop_init(void) blk_register_region(MKDEV(LOOP_MAJOR, 0), range, THIS_MODULE, loop_probe, NULL, NULL); - /* pre-create number devices of devices given by config or max_loop */ + /* pre-create number of devices given by config or max_loop */ mutex_lock(&loop_index_mutex); for (i = 0; i < nr; i++) loop_add(&lo, i); -- cgit v1.2.3 From 05eb0f252b04aa94ace0794f73d56c6a02351d80 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2011 22:21:35 +0200 Subject: loop: fix deadlock when sysfs and LOOP_CLR_FD race against each other LOOP_CLR_FD takes lo->lo_ctl_mutex and tries to remove the loop sysfs files. Sysfs calls show() and waits for lo->lo_ctl_mutex. LOOP_CLR_FD waits for show() to finish to remove the sysfs file. cat /sys/class/block/loop0/loop/backing_file mutex_lock_nested+0x176/0x350 ? loop_attr_do_show_backing_file+0x2f/0xd0 [loop] ? loop_attr_do_show_backing_file+0x2f/0xd0 [loop] loop_attr_do_show_backing_file+0x2f/0xd0 [loop] dev_attr_show+0x1b/0x60 ? sysfs_read_file+0x86/0x1a0 ? __get_free_pages+0x12/0x50 sysfs_read_file+0xaf/0x1a0 ioctl(LOOP_CLR_FD): wait_for_common+0x12c/0x180 ? try_to_wake_up+0x2a0/0x2a0 wait_for_completion+0x18/0x20 sysfs_deactivate+0x178/0x180 ? sysfs_addrm_finish+0x43/0x70 ? sysfs_addrm_start+0x1d/0x20 sysfs_addrm_finish+0x43/0x70 sysfs_hash_and_remove+0x85/0xa0 sysfs_remove_group+0x59/0x100 loop_clr_fd+0x1dc/0x3f0 [loop] lo_ioctl+0x223/0x7a0 [loop] Instead of taking the lo_ctl_mutex from sysfs code, take the inner lo->lo_lock, to protect the access to the backing_file data. Thanks to Tejun for help debugging and finding a solution. Cc: Milan Broz Cc: Tejun Heo Signed-off-by: Kay Sievers Cc: stable@kernel.org Signed-off-by: Jens Axboe --- drivers/block/loop.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 3defc52f060c..4720c7ade0ae 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -743,10 +743,10 @@ static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf) ssize_t ret; char *p = NULL; - mutex_lock(&lo->lo_ctl_mutex); + spin_lock_irq(&lo->lo_lock); if (lo->lo_backing_file) p = d_path(&lo->lo_backing_file->f_path, buf, PAGE_SIZE - 1); - mutex_unlock(&lo->lo_ctl_mutex); + spin_unlock_irq(&lo->lo_lock); if (IS_ERR_OR_NULL(p)) ret = PTR_ERR(p); @@ -1000,7 +1000,9 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) kthread_stop(lo->lo_thread); + spin_lock_irq(&lo->lo_lock); lo->lo_backing_file = NULL; + spin_unlock_irq(&lo->lo_lock); loop_release_xfer(lo); lo->transfer = NULL; -- cgit v1.2.3 From e5a94f56845bb4b272d82e84b5a1e2080b07ba82 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 1 Aug 2011 10:31:06 +0200 Subject: blk-throttle: correctly determine sync bio read request is always sync. Using rw_is_sync() to determine if a bio is sync. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-throttle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f6a794120505..a19f58c6fc3a 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); - bool sync = bio->bi_rw & REQ_SYNC; + bool sync = rw_is_sync(bio->bi_rw); /* Charge the bio to the group */ tg->bytes_disp[rw] += bio->bi_size; @@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) if (tg_no_rule_group(tg, rw)) { blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, - rw, bio->bi_rw & REQ_SYNC); + rw, rw_is_sync(bio->bi_rw)); rcu_read_unlock(); return 0; } -- cgit v1.2.3 From 1c8007b0769d37aa5fcb343b383b0af89ade2f71 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Mon, 1 Aug 2011 12:41:00 -0500 Subject: jfs: flush journal completely before releasing metadata inodes This fixes a race during unmount. We need to not only make sure that the journal is completely written, but that the metadata changes make it to disk before releasing ipimap and ipbmap. Signed-off-by: Dave Kleikamp --- fs/jfs/jfs_umount.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c index adcf92d3b603..7971f37534a3 100644 --- a/fs/jfs/jfs_umount.c +++ b/fs/jfs/jfs_umount.c @@ -68,7 +68,7 @@ int jfs_umount(struct super_block *sb) /* * Wait for outstanding transactions to be written to log: */ - jfs_flush_journal(log, 1); + jfs_flush_journal(log, 2); /* * close fileset inode allocation map (aka fileset inode) @@ -146,7 +146,7 @@ int jfs_umount_rw(struct super_block *sb) * * remove file system from log active file system list. */ - jfs_flush_journal(log, 1); + jfs_flush_journal(log, 2); /* * Make sure all metadata makes it to disk -- cgit v1.2.3 From b03e7495a862b028294f59fc87286d6d78ee7fa1 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Wed, 20 Jul 2011 15:20:54 -0500 Subject: PCI: Set PCI-E Max Payload Size on fabric On a given PCI-E fabric, each device, bridge, and root port can have a different PCI-E maximum payload size. There is a sizable performance boost for having the largest possible maximum payload size on each PCI-E device. However, if improperly configured, fatal bus errors can occur. Thus, it is important to ensure that PCI-E payloads sends by a device are never larger than the MPS setting of all devices on the way to the destination. This can be achieved two ways: - A conservative approach is to use the smallest common denominator of the entire tree below a root complex for every device on that fabric. This means for example that having a 128 bytes MPS USB controller on one leg of a switch will dramatically reduce performances of a video card or 10GE adapter on another leg of that same switch. It also means that any hierarchy supporting hotplug slots (including expresscard or thunderbolt I suppose, dbl check that) will have to be entirely clamped to 128 bytes since we cannot predict what will be plugged into those slots, and we cannot change the MPS on a "live" system. - A more optimal way is possible, if it falls within a couple of constraints: * The top-level host bridge will never generate packets larger than the smallest TLP (or if it can be controlled independently from its MPS at least) * The device will never generate packets larger than MPS (which can be configured via MRRS) * No support of direct PCI-E <-> PCI-E transfers between devices without some additional code to specifically deal with that case Then we can use an approach that basically ignores downstream requests and focuses exclusively on upstream requests. In that case, all we need to care about is that a device MPS is no larger than its parent MPS, which allows us to keep all switches/bridges to the max MPS supported by their parent and eventually the PHB. In this case, your USB controller would no longer "starve" your 10GE Ethernet and your hotplug slots won't affect your global MPS. Additionally, the hotplugged devices themselves can be configured to a larger MPS up to the value configured in the hotplug bridge. To choose between the two available options, two PCI kernel boot args have been added to the PCI calls. "pcie_bus_safe" will provide the former behavior, while "pcie_bus_perf" will perform the latter behavior. By default, the latter behavior is used. NOTE: due to the location of the enablement, each arch will need to add calls to this function. This patch only enables x86. This patch includes a number of changes recommended by Benjamin Herrenschmidt. Tested-by: Jordan_Hargrave@dell.com Signed-off-by: Jon Mason Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 9 +++ drivers/pci/hotplug/pcihp_slot.c | 45 +----------- drivers/pci/pci.c | 67 ++++++++++++++++++ drivers/pci/probe.c | 145 +++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 15 +++- 5 files changed, 236 insertions(+), 45 deletions(-) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index ae3cb23cd89b..c95330267f08 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -360,6 +360,15 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root) } } + /* After the PCI-E bus has been walked and all devices discovered, + * configure any settings of the fabric that might be necessary. + */ + if (bus) { + struct pci_bus *child; + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child, child->self->pcie_mpss); + } + if (!bus) kfree(sd); diff --git a/drivers/pci/hotplug/pcihp_slot.c b/drivers/pci/hotplug/pcihp_slot.c index 749fdf070319..753b21aaea61 100644 --- a/drivers/pci/hotplug/pcihp_slot.c +++ b/drivers/pci/hotplug/pcihp_slot.c @@ -158,47 +158,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp) */ } -/* Program PCIE MaxPayload setting on device: ensure parent maxpayload <= device */ -static int pci_set_payload(struct pci_dev *dev) -{ - int pos, ppos; - u16 pctl, psz; - u16 dctl, dsz, dcap, dmax; - struct pci_dev *parent; - - parent = dev->bus->self; - pos = pci_find_capability(dev, PCI_CAP_ID_EXP); - if (!pos) - return 0; - - /* Read Device MaxPayload capability and setting */ - pci_read_config_word(dev, pos + PCI_EXP_DEVCTL, &dctl); - pci_read_config_word(dev, pos + PCI_EXP_DEVCAP, &dcap); - dsz = (dctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; - dmax = (dcap & PCI_EXP_DEVCAP_PAYLOAD); - - /* Read Parent MaxPayload setting */ - ppos = pci_find_capability(parent, PCI_CAP_ID_EXP); - if (!ppos) - return 0; - pci_read_config_word(parent, ppos + PCI_EXP_DEVCTL, &pctl); - psz = (pctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5; - - /* If parent payload > device max payload -> error - * If parent payload > device payload -> set speed - * If parent payload <= device payload -> do nothing - */ - if (psz > dmax) - return -1; - else if (psz > dsz) { - dev_info(&dev->dev, "Setting MaxPayload to %d\n", 128 << psz); - pci_write_config_word(dev, pos + PCI_EXP_DEVCTL, - (dctl & ~PCI_EXP_DEVCTL_PAYLOAD) + - (psz << 5)); - } - return 0; -} - void pci_configure_slot(struct pci_dev *dev) { struct pci_dev *cdev; @@ -210,9 +169,7 @@ void pci_configure_slot(struct pci_dev *dev) (dev->class >> 8) == PCI_CLASS_BRIDGE_PCI))) return; - ret = pci_set_payload(dev); - if (ret) - dev_warn(&dev->dev, "could not set device max payload\n"); + pcie_bus_configure_settings(dev->bus, dev->bus->self->pcie_mpss); memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 08a95b369d85..466fad6e6ee2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -77,6 +77,8 @@ unsigned long pci_cardbus_mem_size = DEFAULT_CARDBUS_MEM_SIZE; unsigned long pci_hotplug_io_size = DEFAULT_HOTPLUG_IO_SIZE; unsigned long pci_hotplug_mem_size = DEFAULT_HOTPLUG_MEM_SIZE; +enum pcie_bus_config_types pcie_bus_config = PCIE_BUS_PERFORMANCE; + /* * The default CLS is used if arch didn't set CLS explicitly and not * all pci devices agree on the same value. Arch can override either @@ -3222,6 +3224,67 @@ out: } EXPORT_SYMBOL(pcie_set_readrq); +/** + * pcie_get_mps - get PCI Express maximum payload size + * @dev: PCI device to query + * + * Returns maximum payload size in bytes + * or appropriate error value. + */ +int pcie_get_mps(struct pci_dev *dev) +{ + int ret, cap; + u16 ctl; + + cap = pci_pcie_cap(dev); + if (!cap) + return -EINVAL; + + ret = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); + if (!ret) + ret = 128 << ((ctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5); + + return ret; +} + +/** + * pcie_set_mps - set PCI Express maximum payload size + * @dev: PCI device to query + * @rq: maximum payload size in bytes + * valid values are 128, 256, 512, 1024, 2048, 4096 + * + * If possible sets maximum payload size + */ +int pcie_set_mps(struct pci_dev *dev, int mps) +{ + int cap, err = -EINVAL; + u16 ctl, v; + + if (mps < 128 || mps > 4096 || !is_power_of_2(mps)) + goto out; + + v = ffs(mps) - 8; + if (v > dev->pcie_mpss) + goto out; + v <<= 5; + + cap = pci_pcie_cap(dev); + if (!cap) + goto out; + + err = pci_read_config_word(dev, cap + PCI_EXP_DEVCTL, &ctl); + if (err) + goto out; + + if ((ctl & PCI_EXP_DEVCTL_PAYLOAD) != v) { + ctl &= ~PCI_EXP_DEVCTL_PAYLOAD; + ctl |= v; + err = pci_write_config_word(dev, cap + PCI_EXP_DEVCTL, ctl); + } +out: + return err; +} + /** * pci_select_bars - Make BAR mask from the type of resource * @dev: the PCI device for which BAR mask is made @@ -3505,6 +3568,10 @@ static int __init pci_setup(char *str) pci_hotplug_io_size = memparse(str + 9, &str); } else if (!strncmp(str, "hpmemsize=", 10)) { pci_hotplug_mem_size = memparse(str + 10, &str); + } else if (!strncmp(str, "pcie_bus_safe", 13)) { + pcie_bus_config = PCIE_BUS_SAFE; + } else if (!strncmp(str, "pcie_bus_perf", 13)) { + pcie_bus_config = PCIE_BUS_PERFORMANCE; } else { printk(KERN_ERR "PCI: Unknown option `%s'\n", str); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 795c9026d55f..5becf7cd50d8 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -856,6 +856,8 @@ void set_pcie_port_type(struct pci_dev *pdev) pdev->pcie_cap = pos; pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); pdev->pcie_type = (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; + pci_read_config_word(pdev, pos + PCI_EXP_DEVCAP, ®16); + pdev->pcie_mpss = reg16 & PCI_EXP_DEVCAP_PAYLOAD; } void set_pcie_hotplug_bridge(struct pci_dev *pdev) @@ -1326,6 +1328,149 @@ int pci_scan_slot(struct pci_bus *bus, int devfn) return nr; } +static int pcie_find_smpss(struct pci_dev *dev, void *data) +{ + u8 *smpss = data; + + if (!pci_is_pcie(dev)) + return 0; + + /* For PCIE hotplug enabled slots not connected directly to a + * PCI-E root port, there can be problems when hotplugging + * devices. This is due to the possibility of hotplugging a + * device into the fabric with a smaller MPS that the devices + * currently running have configured. Modifying the MPS on the + * running devices could cause a fatal bus error due to an + * incoming frame being larger than the newly configured MPS. + * To work around this, the MPS for the entire fabric must be + * set to the minimum size. Any devices hotplugged into this + * fabric will have the minimum MPS set. If the PCI hotplug + * slot is directly connected to the root port and there are not + * other devices on the fabric (which seems to be the most + * common case), then this is not an issue and MPS discovery + * will occur as normal. + */ + if (dev->is_hotplug_bridge && (!list_is_singular(&dev->bus->devices) || + dev->bus->self->pcie_type != PCI_EXP_TYPE_ROOT_PORT)) + *smpss = 0; + + if (*smpss > dev->pcie_mpss) + *smpss = dev->pcie_mpss; + + return 0; +} + +static void pcie_write_mps(struct pci_dev *dev, int mps) +{ + int rc, dev_mpss; + + dev_mpss = 128 << dev->pcie_mpss; + + if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { + if (dev->bus->self) { + dev_dbg(&dev->bus->dev, "Bus MPSS %d\n", + 128 << dev->bus->self->pcie_mpss); + + /* For "MPS Force Max", the assumption is made that + * downstream communication will never be larger than + * the MRRS. So, the MPS only needs to be configured + * for the upstream communication. This being the case, + * walk from the top down and set the MPS of the child + * to that of the parent bus. + */ + mps = 128 << dev->bus->self->pcie_mpss; + if (mps > dev_mpss) + dev_warn(&dev->dev, "MPS configured higher than" + " maximum supported by the device. If" + " a bus issue occurs, try running with" + " pci=pcie_bus_safe.\n"); + } + + dev->pcie_mpss = ffs(mps) - 8; + } + + rc = pcie_set_mps(dev, mps); + if (rc) + dev_err(&dev->dev, "Failed attempting to set the MPS\n"); +} + +static void pcie_write_mrrs(struct pci_dev *dev, int mps) +{ + int rc, mrrs; + + if (pcie_bus_config == PCIE_BUS_PERFORMANCE) { + int dev_mpss = 128 << dev->pcie_mpss; + + /* For Max performance, the MRRS must be set to the largest + * supported value. However, it cannot be configured larger + * than the MPS the device or the bus can support. This assumes + * that the largest MRRS available on the device cannot be + * smaller than the device MPSS. + */ + mrrs = mps < dev_mpss ? mps : dev_mpss; + } else + /* In the "safe" case, configure the MRRS for fairness on the + * bus by making all devices have the same size + */ + mrrs = mps; + + + /* MRRS is a R/W register. Invalid values can be written, but a + * subsiquent read will verify if the value is acceptable or not. + * If the MRRS value provided is not acceptable (e.g., too large), + * shrink the value until it is acceptable to the HW. + */ + while (mrrs != pcie_get_readrq(dev) && mrrs >= 128) { + rc = pcie_set_readrq(dev, mrrs); + if (rc) + dev_err(&dev->dev, "Failed attempting to set the MRRS\n"); + + mrrs /= 2; + } +} + +static int pcie_bus_configure_set(struct pci_dev *dev, void *data) +{ + int mps = 128 << *(u8 *)data; + + if (!pci_is_pcie(dev)) + return 0; + + dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", + pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); + + pcie_write_mps(dev, mps); + pcie_write_mrrs(dev, mps); + + dev_info(&dev->dev, "Dev MPS %d MPSS %d MRRS %d\n", + pcie_get_mps(dev), 128<pcie_mpss, pcie_get_readrq(dev)); + + return 0; +} + +/* pcie_bus_configure_mps requires that pci_walk_bus work in a top-down, + * parents then children fashion. If this changes, then this code will not + * work as designed. + */ +void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) +{ + u8 smpss = mpss; + + if (!bus->self) + return; + + if (!pci_is_pcie(bus->self)) + return; + + if (pcie_bus_config == PCIE_BUS_SAFE) { + pcie_find_smpss(bus->self, &smpss); + pci_walk_bus(bus, pcie_find_smpss, &smpss); + } + + pcie_bus_configure_set(bus->self, &smpss); + pci_walk_bus(bus, pcie_bus_configure_set, &smpss); +} + unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) { unsigned int devfn, pass, max = bus->secondary; diff --git a/include/linux/pci.h b/include/linux/pci.h index f27893b3b724..1ff9bbafd932 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -251,7 +251,8 @@ struct pci_dev { u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ u8 pcie_cap; /* PCI-E capability offset */ - u8 pcie_type; /* PCI-E device/port type */ + u8 pcie_type:4; /* PCI-E device/port type */ + u8 pcie_mpss:3; /* PCI-E Max Payload Size Supported */ u8 rom_base_reg; /* which config register controls the ROM */ u8 pin; /* which interrupt pin this device uses */ @@ -617,6 +618,16 @@ struct pci_driver { /* these external functions are only available when PCI support is enabled */ #ifdef CONFIG_PCI +extern void pcie_bus_configure_settings(struct pci_bus *bus, u8 smpss); + +enum pcie_bus_config_types { + PCIE_BUS_PERFORMANCE, + PCIE_BUS_SAFE, + PCIE_BUS_PEER2PEER, +}; + +extern enum pcie_bus_config_types pcie_bus_config; + extern struct bus_type pci_bus_type; /* Do NOT directly access these two variables, unless you are arch specific pci @@ -796,6 +807,8 @@ int pcix_get_mmrbc(struct pci_dev *dev); int pcix_set_mmrbc(struct pci_dev *dev, int mmrbc); int pcie_get_readrq(struct pci_dev *dev); int pcie_set_readrq(struct pci_dev *dev, int rq); +int pcie_get_mps(struct pci_dev *dev); +int pcie_set_mps(struct pci_dev *dev, int mps); int __pci_reset_function(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); -- cgit v1.2.3 From be768912a49b10b68e96fbd8fa3cab0adfbd3091 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 25 Jul 2011 13:08:38 -0700 Subject: PCI: honor child buses add_size in hot plug configuration git commit c8adf9a3e873eddaaec11ac410a99ef6b9656938 "PCI: pre-allocate additional resources to devices only after successful allocation of essential resources." fails to take into consideration the optional-resources needed by children devices while calculating the optional-resource needed by the bridge. This can be a problem on some setup. For example, if a hotplug bridge has 8 children hotplug bridges, the bridge should have enough resources to accomodate the hotplug requirements for each of its children hotplug bridges. Currently this is not the case. This patch fixes the problem. Signed-off-by: Yinghai Lu Reviewed-by: Ram Pai Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 8a1d3c7863a8..4409cd0e15fa 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -540,6 +540,20 @@ static resource_size_t calculate_memsize(resource_size_t size, return size; } +static resource_size_t get_res_add_size(struct resource_list_x *add_head, + struct resource *res) +{ + struct resource_list_x *list; + + /* check if it is in add_head list */ + for (list = add_head->next; list && list->res != res; + list = list->next); + if (list) + return list->add_size; + + return 0; +} + /** * pbus_size_io() - size the io window of a given bus * @@ -559,6 +573,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, struct pci_dev *dev; struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO); unsigned long size = 0, size0 = 0, size1 = 0; + resource_size_t children_add_size = 0; if (!b_res) return; @@ -579,10 +594,15 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, size += r_size; else size1 += r_size; + + if (add_head) + children_add_size += get_res_add_size(add_head, r); } } size0 = calculate_iosize(size, min_size, size1, resource_size(b_res), 4096); + if (children_add_size > add_size) + add_size = children_add_size; size1 = (!add_head || (add_head && !add_size)) ? size0 : calculate_iosize(size, min_size+add_size, size1, resource_size(b_res), 4096); @@ -624,6 +644,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, int order, max_order; struct resource *b_res = find_free_bus_resource(bus, type); unsigned int mem64_mask = 0; + resource_size_t children_add_size = 0; if (!b_res) return 0; @@ -665,6 +686,9 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, if (order > max_order) max_order = order; mem64_mask &= r->flags & IORESOURCE_MEM_64; + + if (add_head) + children_add_size += get_res_add_size(add_head, r); } } align = 0; @@ -681,6 +705,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, align += aligns[order]; } size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align); + if (children_add_size > add_size) + add_size = children_add_size; size1 = (!add_head || (add_head && !add_size)) ? size0 : calculate_memsize(size, min_size+add_size, 0, resource_size(b_res), min_align); -- cgit v1.2.3 From 2bbc6942273b5b3097bd265d82227bdd84b351b2 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 25 Jul 2011 13:08:39 -0700 Subject: PCI : ability to relocate assigned pci-resources Currently pci-bridges are allocated enough resources to satisfy their immediate requirements. Any additional resource-requests fail if additional free space, contiguous to the one already allocated, is not available. This behavior is not reasonable since sufficient contiguous resources, that can satisfy the request, are available at a different location. This patch provides the ability to expand and relocate a allocated resource. v2: Changelog: Fixed size calculation in pci_reassign_resource() v3: Changelog : Split this patch. The resource.c changes are already upstream. All the pci driver changes are in here. Signed-off-by: Ram Pai Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 27 +++++---- drivers/pci/setup-res.c | 152 ++++++++++++++++++++++++++++++------------------ include/linux/pci.h | 1 + 3 files changed, 113 insertions(+), 67 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 4409cd0e15fa..1796c6ffe91c 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -34,6 +34,7 @@ struct resource_list_x { resource_size_t start; resource_size_t end; resource_size_t add_size; + resource_size_t min_align; unsigned long flags; }; @@ -65,7 +66,7 @@ void pci_realloc(void) */ static void add_to_list(struct resource_list_x *head, struct pci_dev *dev, struct resource *res, - resource_size_t add_size) + resource_size_t add_size, resource_size_t min_align) { struct resource_list_x *list = head; struct resource_list_x *ln = list->next; @@ -84,13 +85,16 @@ static void add_to_list(struct resource_list_x *head, tmp->end = res->end; tmp->flags = res->flags; tmp->add_size = add_size; + tmp->min_align = min_align; list->next = tmp; } static void add_to_failed_list(struct resource_list_x *head, struct pci_dev *dev, struct resource *res) { - add_to_list(head, dev, res, 0); + add_to_list(head, dev, res, + 0 /* dont care */, + 0 /* dont care */); } static void __dev_sort_resources(struct pci_dev *dev, @@ -159,13 +163,16 @@ static void adjust_resources_sorted(struct resource_list_x *add_head, idx = res - &list->dev->resource[0]; add_size=list->add_size; - if (!resource_size(res) && add_size) { - res->end = res->start + add_size - 1; - if(pci_assign_resource(list->dev, idx)) + if (!resource_size(res)) { + res->end = res->start + add_size - 1; + if(pci_assign_resource(list->dev, idx)) reset_resource(res); - } else if (add_size) { - adjust_resource(res, res->start, - resource_size(res) + add_size); + } else { + resource_size_t align = list->min_align; + res->flags |= list->flags & (IORESOURCE_STARTALIGN|IORESOURCE_SIZEALIGN); + if (pci_reassign_resource(list->dev, idx, add_size, align)) + dev_printk(KERN_DEBUG, &list->dev->dev, "failed to add optional resources res=%pR\n", + res); } out: tmp = list; @@ -619,7 +626,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, b_res->end = b_res->start + size0 - 1; b_res->flags |= IORESOURCE_STARTALIGN; if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0); + add_to_list(add_head, bus->self, b_res, size1-size0, 4096); } /** @@ -722,7 +729,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, b_res->end = size0 + min_align - 1; b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0); + add_to_list(add_head, bus->self, b_res, size1-size0, min_align); return 1; } diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 319f359906e8..51a9095c7da4 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -128,16 +128,16 @@ void pci_disable_bridge_window(struct pci_dev *dev) } #endif /* CONFIG_PCI_QUIRKS */ + + static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, - int resno) + int resno, resource_size_t size, resource_size_t align) { struct resource *res = dev->resource + resno; - resource_size_t size, min, align; + resource_size_t min; int ret; - size = resource_size(res); min = (res->flags & IORESOURCE_IO) ? PCIBIOS_MIN_IO : PCIBIOS_MIN_MEM; - align = pci_resource_alignment(dev, res); /* First, try exact prefetching match.. */ ret = pci_bus_alloc_resource(bus, res, size, align, min, @@ -154,56 +154,101 @@ static int __pci_assign_resource(struct pci_bus *bus, struct pci_dev *dev, ret = pci_bus_alloc_resource(bus, res, size, align, min, 0, pcibios_align_resource, dev); } + return ret; +} - if (ret < 0 && dev->fw_addr[resno]) { - struct resource *root, *conflict; - resource_size_t start, end; +static int pci_revert_fw_address(struct resource *res, struct pci_dev *dev, + int resno, resource_size_t size) +{ + struct resource *root, *conflict; + resource_size_t start, end; + int ret = 0; - /* - * If we failed to assign anything, let's try the address - * where firmware left it. That at least has a chance of - * working, which is better than just leaving it disabled. - */ + if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + else + root = &iomem_resource; + + start = res->start; + end = res->end; + res->start = dev->fw_addr[resno]; + res->end = res->start + size - 1; + dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", + resno, res); + conflict = request_resource_conflict(root, res); + if (conflict) { + dev_info(&dev->dev, + "BAR %d: %pR conflicts with %s %pR\n", resno, + res, conflict->name, conflict); + res->start = start; + res->end = end; + ret = 1; + } + return ret; +} + +static int _pci_assign_resource(struct pci_dev *dev, int resno, int size, resource_size_t min_align) +{ + struct resource *res = dev->resource + resno; + struct pci_bus *bus; + int ret; + char *type; - if (res->flags & IORESOURCE_IO) - root = &ioport_resource; + bus = dev->bus; + while ((ret = __pci_assign_resource(bus, dev, resno, size, min_align))) { + if (!bus->parent || !bus->self->transparent) + break; + bus = bus->parent; + } + + if (ret) { + if (res->flags & IORESOURCE_MEM) + if (res->flags & IORESOURCE_PREFETCH) + type = "mem pref"; + else + type = "mem"; + else if (res->flags & IORESOURCE_IO) + type = "io"; else - root = &iomem_resource; - - start = res->start; - end = res->end; - res->start = dev->fw_addr[resno]; - res->end = res->start + size - 1; - dev_info(&dev->dev, "BAR %d: trying firmware assignment %pR\n", - resno, res); - conflict = request_resource_conflict(root, res); - if (conflict) { - dev_info(&dev->dev, - "BAR %d: %pR conflicts with %s %pR\n", resno, - res, conflict->name, conflict); - res->start = start; - res->end = end; - } else - ret = 0; + type = "unknown"; + dev_info(&dev->dev, + "BAR %d: can't assign %s (size %#llx)\n", + resno, type, (unsigned long long) resource_size(res)); } + return ret; +} + +int pci_reassign_resource(struct pci_dev *dev, int resno, resource_size_t addsize, + resource_size_t min_align) +{ + struct resource *res = dev->resource + resno; + resource_size_t new_size; + int ret; + + if (!res->parent) { + dev_info(&dev->dev, "BAR %d: can't reassign an unassigned resouce %pR " + "\n", resno, res); + return -EINVAL; + } + + new_size = resource_size(res) + addsize + min_align; + ret = _pci_assign_resource(dev, resno, new_size, min_align); if (!ret) { res->flags &= ~IORESOURCE_STARTALIGN; dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); if (resno < PCI_BRIDGE_RESOURCES) pci_update_resource(dev, resno); } - return ret; } int pci_assign_resource(struct pci_dev *dev, int resno) { struct resource *res = dev->resource + resno; - resource_size_t align; + resource_size_t align, size; struct pci_bus *bus; int ret; - char *type; align = pci_resource_alignment(dev, res); if (!align) { @@ -213,34 +258,27 @@ int pci_assign_resource(struct pci_dev *dev, int resno) } bus = dev->bus; - while ((ret = __pci_assign_resource(bus, dev, resno))) { - if (bus->parent && bus->self->transparent) - bus = bus->parent; - else - bus = NULL; - if (bus) - continue; - break; - } + size = resource_size(res); + ret = _pci_assign_resource(dev, resno, size, align); - if (ret) { - if (res->flags & IORESOURCE_MEM) - if (res->flags & IORESOURCE_PREFETCH) - type = "mem pref"; - else - type = "mem"; - else if (res->flags & IORESOURCE_IO) - type = "io"; - else - type = "unknown"; - dev_info(&dev->dev, - "BAR %d: can't assign %s (size %#llx)\n", - resno, type, (unsigned long long) resource_size(res)); - } + /* + * If we failed to assign anything, let's try the address + * where firmware left it. That at least has a chance of + * working, which is better than just leaving it disabled. + */ + if (ret < 0 && dev->fw_addr[resno]) + ret = pci_revert_fw_address(res, dev, resno, size); + if (!ret) { + res->flags &= ~IORESOURCE_STARTALIGN; + dev_info(&dev->dev, "BAR %d: assigned %pR\n", resno, res); + if (resno < PCI_BRIDGE_RESOURCES) + pci_update_resource(dev, resno); + } return ret; } + /* Sort resources by alignment */ void pdev_sort_resources(struct pci_dev *dev, struct resource_list *head) { diff --git a/include/linux/pci.h b/include/linux/pci.h index 1ff9bbafd932..8c230cbcbb48 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -813,6 +813,7 @@ int __pci_reset_function(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); +int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); int pci_select_bars(struct pci_dev *dev, unsigned long flags); /* ROM control related routines */ -- cgit v1.2.3 From 2aceefcbd5a73059e5f52831817ec277e987440d Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 25 Jul 2011 13:08:40 -0700 Subject: PCI: make SRIOV resources optional From: Yinghai Lu Allocate resources to SRIOV BARs only after all other required resource-requests are satisfied. Dont retry if resource allocation for SRIOV BARs fail. Signed-off-by: Ram Pai Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 1796c6ffe91c..1c19b9f4019a 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -673,6 +673,16 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, if (r->parent || (r->flags & mask) != type) continue; r_size = resource_size(r); +#ifdef CONFIG_PCI_IOV + /* put SRIOV requested res to the optional list */ + if (add_head && i >= PCI_IOV_RESOURCES && + i <= PCI_IOV_RESOURCE_END) { + r->end = r->start - 1; + add_to_list(add_head, dev, r, r_size, 1); + children_add_size += r_size; + continue; + } +#endif /* For bridges size != alignment */ align = pci_resource_alignment(dev, r); order = __ffs(align) - 20; -- cgit v1.2.3 From 0a2daa1cf35004f5adbf4138555cc5669abf3a3e Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 25 Jul 2011 13:08:41 -0700 Subject: PCI: make cardbus-bridge resources optional Allocate resources to cardbus bridge only after all other genuine resources requests are satisfied. Dont retry if resource allocation for cardbus-bridges fail. Signed-off-by: Ram Pai Signed-off-by: Jesse Barnes --- drivers/pci/pci.h | 4 ++++ drivers/pci/setup-bus.c | 41 ++++++++++++++++++++++++++++++++--------- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index c8cee764b0de..b74084e9ca12 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -283,6 +283,8 @@ static inline int pci_iov_bus_range(struct pci_bus *bus) #endif /* CONFIG_PCI_IOV */ +extern unsigned long pci_cardbus_resource_alignment(struct resource *); + static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, struct resource *res) { @@ -292,6 +294,8 @@ static inline resource_size_t pci_resource_alignment(struct pci_dev *dev, if (resno >= PCI_IOV_RESOURCES && resno <= PCI_IOV_RESOURCE_END) return pci_sriov_resource_alignment(dev, resno); #endif + if (dev->class >> 8 == PCI_CLASS_BRIDGE_CARDBUS) + return pci_cardbus_resource_alignment(res); return resource_alignment(res); } diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 1c19b9f4019a..29e7cc73537c 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -164,6 +164,7 @@ static void adjust_resources_sorted(struct resource_list_x *add_head, idx = res - &list->dev->resource[0]; add_size=list->add_size; if (!resource_size(res)) { + res->start = list->start; res->end = res->start + add_size - 1; if(pci_assign_resource(list->dev, idx)) reset_resource(res); @@ -223,7 +224,7 @@ static void __assign_resources_sorted(struct resource_list *head, /* Satisfy the must-have resource requests */ assign_requested_resources_sorted(head, fail_head); - /* Try to satisfy any additional nice-to-have resource + /* Try to satisfy any additional optional resource requests */ if (add_head) adjust_resources_sorted(add_head, head); @@ -678,7 +679,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, if (add_head && i >= PCI_IOV_RESOURCES && i <= PCI_IOV_RESOURCE_END) { r->end = r->start - 1; - add_to_list(add_head, dev, r, r_size, 1); + add_to_list(add_head, dev, r, r_size, 0/* dont' care */); children_add_size += r_size; continue; } @@ -743,7 +744,17 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, return 1; } -static void pci_bus_size_cardbus(struct pci_bus *bus) +unsigned long pci_cardbus_resource_alignment(struct resource *res) +{ + if (res->flags & IORESOURCE_IO) + return pci_cardbus_io_size; + if (res->flags & IORESOURCE_MEM) + return pci_cardbus_mem_size; + return 0; +} + +static void pci_bus_size_cardbus(struct pci_bus *bus, + struct resource_list_x *add_head) { struct pci_dev *bridge = bus->self; struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; @@ -754,12 +765,14 @@ static void pci_bus_size_cardbus(struct pci_bus *bus) * a fixed amount of bus space for CardBus bridges. */ b_res[0].start = 0; - b_res[0].end = pci_cardbus_io_size - 1; b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; + if (add_head) + add_to_list(add_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */); b_res[1].start = 0; - b_res[1].end = pci_cardbus_io_size - 1; b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; + if (add_head) + add_to_list(add_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */); /* * Check whether prefetchable memory is supported @@ -779,17 +792,27 @@ static void pci_bus_size_cardbus(struct pci_bus *bus) */ if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) { b_res[2].start = 0; - b_res[2].end = pci_cardbus_mem_size - 1; b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN; + if (add_head) + add_to_list(add_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */); b_res[3].start = 0; - b_res[3].end = pci_cardbus_mem_size - 1; b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; + if (add_head) + add_to_list(add_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */); } else { b_res[3].start = 0; - b_res[3].end = pci_cardbus_mem_size * 2 - 1; b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; + if (add_head) + add_to_list(add_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */); } + + /* set the size of the resource to zero, so that the resource does not + * get assigned during required-resource allocation cycle but gets assigned + * during the optional-resource allocation cycle. + */ + b_res[0].start = b_res[1].start = b_res[2].start = b_res[3].start = 1; + b_res[0].end = b_res[1].end = b_res[2].end = b_res[3].end = 0; } void __ref __pci_bus_size_bridges(struct pci_bus *bus, @@ -806,7 +829,7 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, switch (dev->class >> 8) { case PCI_CLASS_BRIDGE_CARDBUS: - pci_bus_size_cardbus(b); + pci_bus_size_cardbus(b, add_head); break; case PCI_CLASS_BRIDGE_PCI: -- cgit v1.2.3 From 9e8bf93a7f416a3fa8fb6d76177d90e67bd45496 Mon Sep 17 00:00:00 2001 From: Ram Pai Date: Mon, 25 Jul 2011 13:08:42 -0700 Subject: PCI: code and comments cleanup a) adjust_resource_sorted() is now called reassign_resource_sorted() b) nice-to-have is now called optional c) add_list is now called realloc_list. Signed-off-by: Ram Pai Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 110 ++++++++++++++++++++++++------------------------ 1 file changed, 55 insertions(+), 55 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 29e7cc73537c..784da9d36029 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -125,18 +125,18 @@ static inline void reset_resource(struct resource *res) } /** - * adjust_resources_sorted() - satisfy any additional resource requests + * reassign_resources_sorted() - satisfy any additional resource requests * - * @add_head : head of the list tracking requests requiring additional + * @realloc_head : head of the list tracking requests requiring additional * resources * @head : head of the list tracking requests with allocated * resources * - * Walk through each element of the add_head and try to procure + * Walk through each element of the realloc_head and try to procure * additional resources for the element, provided the element * is in the head list. */ -static void adjust_resources_sorted(struct resource_list_x *add_head, +static void reassign_resources_sorted(struct resource_list_x *realloc_head, struct resource_list *head) { struct resource *res; @@ -145,8 +145,8 @@ static void adjust_resources_sorted(struct resource_list_x *add_head, resource_size_t add_size; int idx; - prev = add_head; - for (list = add_head->next; list;) { + prev = realloc_head; + for (list = realloc_head->next; list;) { res = list->res; /* skip resource that has been reset */ if (!res->flags) @@ -218,7 +218,7 @@ static void assign_requested_resources_sorted(struct resource_list *head, } static void __assign_resources_sorted(struct resource_list *head, - struct resource_list_x *add_head, + struct resource_list_x *realloc_head, struct resource_list_x *fail_head) { /* Satisfy the must-have resource requests */ @@ -226,8 +226,8 @@ static void __assign_resources_sorted(struct resource_list *head, /* Try to satisfy any additional optional resource requests */ - if (add_head) - adjust_resources_sorted(add_head, head); + if (realloc_head) + reassign_resources_sorted(realloc_head, head); free_list(resource_list, head); } @@ -243,7 +243,7 @@ static void pdev_assign_resources_sorted(struct pci_dev *dev, } static void pbus_assign_resources_sorted(const struct pci_bus *bus, - struct resource_list_x *add_head, + struct resource_list_x *realloc_head, struct resource_list_x *fail_head) { struct pci_dev *dev; @@ -253,7 +253,7 @@ static void pbus_assign_resources_sorted(const struct pci_bus *bus, list_for_each_entry(dev, &bus->devices, bus_list) __dev_sort_resources(dev, &head); - __assign_resources_sorted(&head, add_head, fail_head); + __assign_resources_sorted(&head, realloc_head, fail_head); } void pci_setup_cardbus(struct pci_bus *bus) @@ -548,13 +548,13 @@ static resource_size_t calculate_memsize(resource_size_t size, return size; } -static resource_size_t get_res_add_size(struct resource_list_x *add_head, +static resource_size_t get_res_add_size(struct resource_list_x *realloc_head, struct resource *res) { struct resource_list_x *list; - /* check if it is in add_head list */ - for (list = add_head->next; list && list->res != res; + /* check if it is in realloc_head list */ + for (list = realloc_head->next; list && list->res != res; list = list->next); if (list) return list->add_size; @@ -568,7 +568,7 @@ static resource_size_t get_res_add_size(struct resource_list_x *add_head, * @bus : the bus * @min_size : the minimum io window that must to be allocated * @add_size : additional optional io window - * @add_head : track the additional io window on this list + * @realloc_head : track the additional io window on this list * * Sizing the IO windows of the PCI-PCI bridge is trivial, * since these windows have 4K granularity and the IO ranges @@ -576,7 +576,7 @@ static resource_size_t get_res_add_size(struct resource_list_x *add_head, * We must be careful with the ISA aliasing though. */ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, - resource_size_t add_size, struct resource_list_x *add_head) + resource_size_t add_size, struct resource_list_x *realloc_head) { struct pci_dev *dev; struct resource *b_res = find_free_bus_resource(bus, IORESOURCE_IO); @@ -603,15 +603,15 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, else size1 += r_size; - if (add_head) - children_add_size += get_res_add_size(add_head, r); + if (realloc_head) + children_add_size += get_res_add_size(realloc_head, r); } } size0 = calculate_iosize(size, min_size, size1, resource_size(b_res), 4096); if (children_add_size > add_size) add_size = children_add_size; - size1 = (!add_head || (add_head && !add_size)) ? size0 : + size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 : calculate_iosize(size, min_size+add_size, size1, resource_size(b_res), 4096); if (!size0 && !size1) { @@ -626,8 +626,8 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, b_res->start = 4096; b_res->end = b_res->start + size0 - 1; b_res->flags |= IORESOURCE_STARTALIGN; - if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0, 4096); + if (size1 > size0 && realloc_head) + add_to_list(realloc_head, bus->self, b_res, size1-size0, 4096); } /** @@ -636,7 +636,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, * @bus : the bus * @min_size : the minimum memory window that must to be allocated * @add_size : additional optional memory window - * @add_head : track the additional memory window on this list + * @realloc_head : track the additional memory window on this list * * Calculate the size of the bus and minimal alignment which * guarantees that all child resources fit in this size. @@ -644,7 +644,7 @@ static void pbus_size_io(struct pci_bus *bus, resource_size_t min_size, static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, unsigned long type, resource_size_t min_size, resource_size_t add_size, - struct resource_list_x *add_head) + struct resource_list_x *realloc_head) { struct pci_dev *dev; resource_size_t min_align, align, size, size0, size1; @@ -676,10 +676,10 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, r_size = resource_size(r); #ifdef CONFIG_PCI_IOV /* put SRIOV requested res to the optional list */ - if (add_head && i >= PCI_IOV_RESOURCES && + if (realloc_head && i >= PCI_IOV_RESOURCES && i <= PCI_IOV_RESOURCE_END) { r->end = r->start - 1; - add_to_list(add_head, dev, r, r_size, 0/* dont' care */); + add_to_list(realloc_head, dev, r, r_size, 0/* dont' care */); children_add_size += r_size; continue; } @@ -705,8 +705,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, max_order = order; mem64_mask &= r->flags & IORESOURCE_MEM_64; - if (add_head) - children_add_size += get_res_add_size(add_head, r); + if (realloc_head) + children_add_size += get_res_add_size(realloc_head, r); } } align = 0; @@ -725,7 +725,7 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, size0 = calculate_memsize(size, min_size, 0, resource_size(b_res), min_align); if (children_add_size > add_size) add_size = children_add_size; - size1 = (!add_head || (add_head && !add_size)) ? size0 : + size1 = (!realloc_head || (realloc_head && !add_size)) ? size0 : calculate_memsize(size, min_size+add_size, 0, resource_size(b_res), min_align); if (!size0 && !size1) { @@ -739,8 +739,8 @@ static int pbus_size_mem(struct pci_bus *bus, unsigned long mask, b_res->start = min_align; b_res->end = size0 + min_align - 1; b_res->flags |= IORESOURCE_STARTALIGN | mem64_mask; - if (size1 > size0 && add_head) - add_to_list(add_head, bus->self, b_res, size1-size0, min_align); + if (size1 > size0 && realloc_head) + add_to_list(realloc_head, bus->self, b_res, size1-size0, min_align); return 1; } @@ -754,7 +754,7 @@ unsigned long pci_cardbus_resource_alignment(struct resource *res) } static void pci_bus_size_cardbus(struct pci_bus *bus, - struct resource_list_x *add_head) + struct resource_list_x *realloc_head) { struct pci_dev *bridge = bus->self; struct resource *b_res = &bridge->resource[PCI_BRIDGE_RESOURCES]; @@ -766,13 +766,13 @@ static void pci_bus_size_cardbus(struct pci_bus *bus, */ b_res[0].start = 0; b_res[0].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; - if (add_head) - add_to_list(add_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */); + if (realloc_head) + add_to_list(realloc_head, bridge, b_res, pci_cardbus_io_size, 0 /* dont care */); b_res[1].start = 0; b_res[1].flags |= IORESOURCE_IO | IORESOURCE_SIZEALIGN; - if (add_head) - add_to_list(add_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */); + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+1, pci_cardbus_io_size, 0 /* dont care */); /* * Check whether prefetchable memory is supported @@ -793,18 +793,18 @@ static void pci_bus_size_cardbus(struct pci_bus *bus, if (ctrl & PCI_CB_BRIDGE_CTL_PREFETCH_MEM0) { b_res[2].start = 0; b_res[2].flags |= IORESOURCE_MEM | IORESOURCE_PREFETCH | IORESOURCE_SIZEALIGN; - if (add_head) - add_to_list(add_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */); + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+2, pci_cardbus_mem_size, 0 /* dont care */); b_res[3].start = 0; b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; - if (add_head) - add_to_list(add_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */); + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size, 0 /* dont care */); } else { b_res[3].start = 0; b_res[3].flags |= IORESOURCE_MEM | IORESOURCE_SIZEALIGN; - if (add_head) - add_to_list(add_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */); + if (realloc_head) + add_to_list(realloc_head, bridge, b_res+3, pci_cardbus_mem_size * 2, 0 /* dont care */); } /* set the size of the resource to zero, so that the resource does not @@ -816,7 +816,7 @@ static void pci_bus_size_cardbus(struct pci_bus *bus, } void __ref __pci_bus_size_bridges(struct pci_bus *bus, - struct resource_list_x *add_head) + struct resource_list_x *realloc_head) { struct pci_dev *dev; unsigned long mask, prefmask; @@ -829,12 +829,12 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, switch (dev->class >> 8) { case PCI_CLASS_BRIDGE_CARDBUS: - pci_bus_size_cardbus(b, add_head); + pci_bus_size_cardbus(b, realloc_head); break; case PCI_CLASS_BRIDGE_PCI: default: - __pci_bus_size_bridges(b, add_head); + __pci_bus_size_bridges(b, realloc_head); break; } } @@ -858,7 +858,7 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, * Follow thru */ default: - pbus_size_io(bus, 0, additional_io_size, add_head); + pbus_size_io(bus, 0, additional_io_size, realloc_head); /* If the bridge supports prefetchable range, size it separately. If it doesn't, or its prefetchable window has already been allocated by arch code, try @@ -866,11 +866,11 @@ void __ref __pci_bus_size_bridges(struct pci_bus *bus, resources. */ mask = IORESOURCE_MEM; prefmask = IORESOURCE_MEM | IORESOURCE_PREFETCH; - if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, add_head)) + if (pbus_size_mem(bus, prefmask, prefmask, 0, additional_mem_size, realloc_head)) mask = prefmask; /* Success, size non-prefetch only. */ else additional_mem_size += additional_mem_size; - pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, add_head); + pbus_size_mem(bus, mask, IORESOURCE_MEM, 0, additional_mem_size, realloc_head); break; } } @@ -882,20 +882,20 @@ void __ref pci_bus_size_bridges(struct pci_bus *bus) EXPORT_SYMBOL(pci_bus_size_bridges); static void __ref __pci_bus_assign_resources(const struct pci_bus *bus, - struct resource_list_x *add_head, + struct resource_list_x *realloc_head, struct resource_list_x *fail_head) { struct pci_bus *b; struct pci_dev *dev; - pbus_assign_resources_sorted(bus, add_head, fail_head); + pbus_assign_resources_sorted(bus, realloc_head, fail_head); list_for_each_entry(dev, &bus->devices, bus_list) { b = dev->subordinate; if (!b) continue; - __pci_bus_assign_resources(b, add_head, fail_head); + __pci_bus_assign_resources(b, realloc_head, fail_head); switch (dev->class >> 8) { case PCI_CLASS_BRIDGE_PCI: @@ -1105,7 +1105,7 @@ void __init pci_assign_unassigned_resources(void) { struct pci_bus *bus; - struct resource_list_x add_list; /* list of resources that + struct resource_list_x realloc_list; /* list of resources that want additional resources */ int tried_times = 0; enum release_type rel_type = leaf_only; @@ -1118,7 +1118,7 @@ pci_assign_unassigned_resources(void) head.next = NULL; - add_list.next = NULL; + realloc_list.next = NULL; pci_try_num = max_depth + 1; printk(KERN_DEBUG "PCI: max bus depth: %d pci_try_num: %d\n", @@ -1128,12 +1128,12 @@ again: /* Depth first, calculate sizes and alignments of all subordinate buses. */ list_for_each_entry(bus, &pci_root_buses, node) - __pci_bus_size_bridges(bus, &add_list); + __pci_bus_size_bridges(bus, &realloc_list); /* Depth last, allocate resources and update the hardware. */ list_for_each_entry(bus, &pci_root_buses, node) - __pci_bus_assign_resources(bus, &add_list, &head); - BUG_ON(add_list.next); + __pci_bus_assign_resources(bus, &realloc_list, &head); + BUG_ON(realloc_list.next); tried_times++; /* any device complain? */ -- cgit v1.2.3 From 4a4c879904aa0cc64629e14a49b64fb3d149bf1a Mon Sep 17 00:00:00 2001 From: Dan Bastone Date: Sun, 31 Jul 2011 07:40:49 -0400 Subject: HID: add support for new revision of Apple aluminum keyboard Add USB device ids for the new revision (MB110LL/B) of Apple's wired aluminum keyboard. I have only confirmed that the ANSI version is correct - it is assumed that the ISO and JIS versions follow the standard numbering convention. Signed-off-by: Dan Bastone Signed-off-by: Jiri Kosina --- drivers/hid/hid-apple.c | 6 ++++++ drivers/hid/hid-core.c | 3 +++ drivers/hid/hid-ids.h | 3 +++ 3 files changed, 12 insertions(+) diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index b85744fe8464..18b3bc646bf3 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -444,6 +444,12 @@ static const struct hid_device_id apple_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_GEYSER4_HF_JIS), .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN | APPLE_RDESC_JIS }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI), + .driver_data = APPLE_HAS_FN }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO), + .driver_data = APPLE_HAS_FN | APPLE_ISO_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS), + .driver_data = APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ANSI), .driver_data = APPLE_NUMLOCK_EMULATION | APPLE_HAS_FN }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_ISO), diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 1a5cf0c9cfca..242353df3dc4 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1340,6 +1340,9 @@ static const struct hid_device_id hid_have_special_driver[] = { { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_ISO) }, { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_WELLSPRING5_JIS) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ANSI) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_ISO) }, + { HID_USB_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_REVB_JIS) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO) }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_APPLE, USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index db63ccf21cc8..61c880939f56 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -109,6 +109,9 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI 0x0245 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO 0x0246 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS 0x0247 +#define USB_DEVICE_ID_APPLE_ALU_REVB_ANSI 0x024f +#define USB_DEVICE_ID_APPLE_ALU_REVB_ISO 0x0250 +#define USB_DEVICE_ID_APPLE_ALU_REVB_JIS 0x0251 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ANSI 0x0239 #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_ISO 0x023a #define USB_DEVICE_ID_APPLE_ALU_WIRELESS_2009_JIS 0x023b -- cgit v1.2.3 From a5395b83b78f62ccf5e3af854aacd025c2a6e7b5 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 2 Aug 2011 09:24:09 +0200 Subject: cfq-iosched: Reduce linked group count upon group destruction FQ keeps track of number of groups which are linked on blkcg->blkg_list. This is useful to avoid races between queue exit and cgroup exit code paths. So if at the request queue exit time linked group count is not zero, that means there are some group out there which is yet to be deleted under rcu read period and queue exit code should wait for on rcu period. In my previous patch I forgot to decrease the number of group count. So in current form, we nr_blkcg_linked_grps is always non-zero and we will always wait one rcu period (if BLK_CGROUP=y). The side effect of this is that it can increase boot time. I am surprised, nobody complained so far. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1f96ad6254f1..650834537606 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1209,6 +1209,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) hlist_del_init(&cfqg->cfqd_node); + BUG_ON(cfqd->nr_blkcg_linked_grps <= 0); + cfqd->nr_blkcg_linked_grps--; + /* * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. -- cgit v1.2.3 From e2a5429ff7947ad251310376384f449297b7492a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 2 Aug 2011 10:43:35 +0200 Subject: bsg-lib: add module.h include Due to conflicts with the moduleh tree in linux-next, we run into an include file mess. We really need export.h in that tree, but if we add module.h locally then the issue is easier to resolve. Reported-by: Stephen Rothwell Signed-off-by: Jens Axboe --- block/bsg-lib.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/bsg-lib.c b/block/bsg-lib.c index f8c0a61a529c..6690e6e41037 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -25,6 +25,7 @@ #include #include #include +#include #include /** -- cgit v1.2.3 From ddad9ef5826efdfbbdb67b13b46f30e43e46ec3e Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Tue, 2 Aug 2011 12:43:49 +0200 Subject: drivers/block/drbd/drbd_nl.c: use bitmap_parse instead of __bitmap_parse The buffer 'sc.cpu_mask' is a kernel buffer. If bitmap_parse is used instead of __bitmap_parse the extra parameter that indicates a kernel buffer is not needed. Signed-off-by: H Hartley Sweeten Cc: Lars Ellenberg Cc: Philipp Reisner Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_nl.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 515bcd948a43..0feab261e295 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -1829,10 +1829,10 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n /* silently ignore cpu mask on UP kernel */ if (nr_cpu_ids > 1 && sc.cpu_mask[0] != 0) { - err = __bitmap_parse(sc.cpu_mask, 32, 0, + err = bitmap_parse(sc.cpu_mask, 32, cpumask_bits(new_cpu_mask), nr_cpu_ids); if (err) { - dev_warn(DEV, "__bitmap_parse() failed with %d\n", err); + dev_warn(DEV, "bitmap_parse() failed with %d\n", err); retcode = ERR_CPU_MASK_PARSE; goto fail; } -- cgit v1.2.3 From aec9f377e4f235c47e27fd8a429555dfa2dda342 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 2 Aug 2011 12:43:50 +0200 Subject: drivers/cdrom/cdrom.c: relax check on dvd manufacturer value The report has an ISO which has a very long manufacturer ID. It seems that Linux is wrong, not the ISO maker. Relax the check for the length of this field: emit a warning and truncate the incoming data to 2048 bytes rather than rejecting the entire thing. dvd_manufact.value isn't null-terminated. I'm not even sure if it's a string. The kernel doesn't apepar to use it anyway. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=39062 Reported-by: Tested-by: Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/cdrom/cdrom.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 75fb965b8f72..f997c27d79e2 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -1929,11 +1929,17 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s, goto out; s->manufact.len = buf[0] << 8 | buf[1]; - if (s->manufact.len < 0 || s->manufact.len > 2048) { + if (s->manufact.len < 0) { cdinfo(CD_WARNING, "Received invalid manufacture info length" " (%d)\n", s->manufact.len); ret = -EIO; } else { + if (s->manufact.len > 2048) { + cdinfo(CD_WARNING, "Received invalid manufacture info " + "length (%d): truncating to 2048\n", + s->manufact.len); + s->manufact.len = 2048; + } memcpy(s->manufact.value, &buf[4], s->manufact.len); } -- cgit v1.2.3 From f95fe9cfb49f6e625fbb5888cae2ed6f3a276b89 Mon Sep 17 00:00:00 2001 From: Herbert Poetzl Date: Tue, 2 Aug 2011 12:43:50 +0200 Subject: block/genhd.c: remove useless cast in diskstats_show() Remove the (unsigned long long) cast in diskstats_show() and adjusts the seq_printf() format string to 'unsigned long' diskstats_show() uses part_stat_read() to get the stats, which either accesses the specified field in the struct disk_stats directly (non SMP) or sums up the per CPU values in a variable of the same type as the field, so in any case the result will have the same type and range as the specified field which for all disk_stats entries is unsigned long Also, for unsigned long ranges the output of %lu should be identical to the one of %llu, so no change in the actual proc entry contents. Signed-off-by: Herbert Poetzl Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/genhd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index 5cb51c55f6d8..e2f67902dd02 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v) cpu = part_stat_lock(); part_round_stats(cpu, hd); part_stat_unlock(); - seq_printf(seqf, "%4d %7d %s %lu %lu %llu " - "%u %lu %lu %llu %u %u %u %u\n", + seq_printf(seqf, "%4d %7d %s %lu %lu %lu " + "%u %lu %lu %lu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), disk_name(gp, hd->partno, buf), part_stat_read(hd, ios[READ]), part_stat_read(hd, merges[READ]), - (unsigned long long)part_stat_read(hd, sectors[READ]), + part_stat_read(hd, sectors[READ]), jiffies_to_msecs(part_stat_read(hd, ticks[READ])), part_stat_read(hd, ios[WRITE]), part_stat_read(hd, merges[WRITE]), - (unsigned long long)part_stat_read(hd, sectors[WRITE]), + part_stat_read(hd, sectors[WRITE]), jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), part_in_flight(hd), jiffies_to_msecs(part_stat_read(hd, io_ticks)), -- cgit v1.2.3 From debc3b778508f59696ff188f0feca271dcbfa7d9 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Tue, 2 Aug 2011 00:01:18 -0500 Subject: PCI: export pcie_bus_configure_settings symbol pcie_bus_configure_settings needs to be exported if the PCI hotplug driver is being compiled as a module. Reported-by: Stephen Rothwell Signed-off-by: Jon Mason Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 5becf7cd50d8..8473727b29fa 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1470,6 +1470,7 @@ void pcie_bus_configure_settings(struct pci_bus *bus, u8 mpss) pcie_bus_configure_set(bus->self, &smpss); pci_walk_bus(bus, pcie_bus_configure_set, &smpss); } +EXPORT_SYMBOL_GPL(pcie_bus_configure_settings); unsigned int __devinit pci_scan_child_bus(struct pci_bus *bus) { -- cgit v1.2.3 From 891f692533c36a17f00d25d24e4ac44ef38c9e5c Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 14 Jul 2011 17:53:54 +0000 Subject: Docs: MSI-HOWTO: Use the subjunctive, and change `can' to `may' Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 3f5e0b09bed5..43ffff1b5618 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -45,7 +45,7 @@ arrived in memory (this becomes more likely with devices behind PCI-PCI bridges). In order to ensure that all the data has arrived in memory, the interrupt handler must read a register on the device which raised the interrupt. PCI transaction ordering rules require that all the data -arrives in memory before the value can be returned from the register. +arrive in memory before the value may be returned from the register. Using MSIs avoids this problem as the interrupt-generating write cannot pass the data writes, so by the time the interrupt is raised, the driver knows that all the data has arrived in memory. -- cgit v1.2.3 From 4979de6efb5553505a595eadc1cf7c386ca1ddc6 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 14 Jul 2011 19:52:56 +0000 Subject: Docs: MSI-HOWTO: Use present tense and streamline some wording Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 44 ++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 43ffff1b5618..13f3a9930ad5 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -86,13 +86,13 @@ device. int pci_enable_msi(struct pci_dev *dev) -A successful call will allocate ONE interrupt to the device, regardless -of how many MSIs the device supports. The device will be switched from +A successful call allocates ONE interrupt to the device, regardless +of how many MSIs the device supports. The device is switched from pin-based interrupt mode to MSI mode. The dev->irq number is changed -to a new number which represents the message signaled interrupt. -This function should be called before the driver calls request_irq() -since enabling MSIs disables the pin-based IRQ and the driver will not -receive interrupts on the old interrupt. +to a new number which represents the message signaled interrupt; +consequently, this function should be called before the driver calls +request_irq(), because an MSI is delivered via a vector that is +different from the vector of a pin-based interrupt. 4.2.2 pci_enable_msi_block @@ -111,10 +111,10 @@ the device are in the range dev->irq to dev->irq + count - 1. If this function returns a negative number, it indicates an error and the driver should not attempt to request any more MSI interrupts for -this device. If this function returns a positive number, it will be -less than 'count' and indicate the number of interrupts that could have -been allocated. In neither case will the irq value have been -updated, nor will the device have been switched into MSI mode. +this device. If this function returns a positive number, it is +less than 'count' and indicates the number of interrupts that could have +been allocated. In neither case is the irq value updated or the device +switched into MSI mode. The device driver must decide what action to take if pci_enable_msi_block() returns a value less than the number asked for. @@ -124,7 +124,7 @@ again. Note that it is not guaranteed to succeed, even when the 'count' has been reduced to the value returned from a previous call to pci_enable_msi_block(). This is because there are multiple constraints on the number of vectors that can be allocated; pci_enable_msi_block() -will return as soon as it finds any constraint that doesn't allow the +returns as soon as it finds any constraint that doesn't allow the call to succeed. 4.2.3 pci_disable_msi @@ -139,8 +139,8 @@ device, so drivers should not cache the value of dev->irq. A device driver must always call free_irq() on the interrupt(s) for which it has called request_irq() before calling this function. -Failure to do so will result in a BUG_ON(), the device will be left with -MSI enabled and will leak its vector. +Failure to do so results in a BUG_ON(), leaving the device with +MSI enabled and thus leaking its vector. 4.3 Using MSI-X @@ -168,10 +168,10 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) Calling this function asks the PCI subsystem to allocate 'nvec' MSIs. The 'entries' argument is a pointer to an array of msix_entry structs which should be at least 'nvec' entries in size. On success, the -function will return 0 and the device will have been switched into -MSI-X interrupt mode. The 'vector' elements in each entry will have -been filled in with the interrupt number. The driver should then call -request_irq() for each 'vector' that it decides to use. +device is switched into MSI-X mode and the function returns 0. +The 'vector' member in each entry is populated with the interrupt number; +the driver should then call request_irq() for each 'vector' that it +decides to use. If this function returns a negative number, it indicates an error and the driver should not attempt to allocate any more MSI-X interrupts for @@ -219,8 +219,8 @@ the value of the 'vector' elements over a call to pci_disable_msix(). A device driver must always call free_irq() on the interrupt(s) for which it has called request_irq() before calling this function. -Failure to do so will result in a BUG_ON(), the device will be left with -MSI enabled and will leak its vector. +Failure to do so results in a BUG_ON(), leaving the device with +MSI-X enabled and thus leaking its vector. 4.3.3 The MSI-X Table @@ -235,7 +235,7 @@ If a device implements both MSI and MSI-X capabilities, it can run in either MSI mode or MSI-X mode but not both simultaneously. This is a requirement of the PCI spec, and it is enforced by the PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or -pci_enable_msix() when MSI is already enabled will result in an error. +pci_enable_msix() when MSI is already enabled results in an error. If a device driver wishes to switch between MSI and MSI-X at runtime, it must first quiesce the device, then switch it back to pin-interrupt mode, before calling pci_enable_msi() or pci_enable_msix() and resuming @@ -281,7 +281,7 @@ disabled to enabled and back again. Using 'lspci -v' (as root) may show some devices with "MSI", "Message Signalled Interrupts" or "MSI-X" capabilities. Each of these capabilities -has an 'Enable' flag which will be followed with either "+" (enabled) +has an 'Enable' flag which is followed with either "+" (enabled) or "-" (disabled). @@ -298,7 +298,7 @@ The PCI stack provides three ways to disable MSIs: Some host chipsets simply don't support MSIs properly. If we're lucky, the manufacturer knows this and has indicated it in the ACPI -FADT table. In this case, Linux will automatically disable MSIs. +FADT table. In this case, Linux automatically disables MSIs. Some boards don't include this information in the table and so we have to detect them ourselves. The complete list of these is found near the quirk_disable_all_msi() function in drivers/pci/quirks.c. -- cgit v1.2.3 From a2d4d50128279c67d4cf38061206cddc1fc37e75 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 14 Jul 2011 20:03:28 +0000 Subject: Docs: MSI-HOWTO: `asked for' -> `requested' Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 13f3a9930ad5..867ed0351106 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -117,7 +117,7 @@ been allocated. In neither case is the irq value updated or the device switched into MSI mode. The device driver must decide what action to take if -pci_enable_msi_block() returns a value less than the number asked for. +pci_enable_msi_block() returns a value less than the number requested. Some devices can make use of fewer interrupts than the maximum they request; in this case the driver should call pci_enable_msi_block() again. Note that it is not guaranteed to succeed, even when the -- cgit v1.2.3 From 1d15afcc73004028f2870ede7a56d590e1ca8ca8 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 14 Jul 2011 20:05:01 +0000 Subject: Docs: MSI-HOWTO: Streamline some wording Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 867ed0351106..faf37f9d29dc 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -118,8 +118,8 @@ switched into MSI mode. The device driver must decide what action to take if pci_enable_msi_block() returns a value less than the number requested. -Some devices can make use of fewer interrupts than the maximum they -request; in this case the driver should call pci_enable_msi_block() +For instance, the driver could still make use of fewer interrupts; +in this case the driver should call pci_enable_msi_block() again. Note that it is not guaranteed to succeed, even when the 'count' has been reduced to the value returned from a previous call to pci_enable_msi_block(). This is because there are multiple constraints -- cgit v1.2.3 From 263d8d57b3b2e2fbb4e79b7cda7ef3399add4fb7 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 14 Jul 2011 21:28:00 +0000 Subject: Docs: MSI-HOWTO: Put the `because' subordinate clause first Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index faf37f9d29dc..1d7047a34862 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -137,8 +137,8 @@ interrupt number and frees the previously allocated message signaled interrupt(s). The interrupt may subsequently be assigned to another device, so drivers should not cache the value of dev->irq. -A device driver must always call free_irq() on the interrupt(s) -for which it has called request_irq() before calling this function. +Before calling this function, a device driver must always call free_irq() +on any interrupt for which it previously called request_irq(). Failure to do so results in a BUG_ON(), leaving the device with MSI enabled and thus leaking its vector. @@ -217,8 +217,8 @@ the previously allocated message signaled interrupts. The interrupts may subsequently be assigned to another device, so drivers should not cache the value of the 'vector' elements over a call to pci_disable_msix(). -A device driver must always call free_irq() on the interrupt(s) -for which it has called request_irq() before calling this function. +Before calling this function, a device driver must always call free_irq() +on any interrupt for which it previously called request_irq(). Failure to do so results in a BUG_ON(), leaving the device with MSI-X enabled and thus leaking its vector. -- cgit v1.2.3 From e4439236ef5ac8e51ce97d03df8ef3e6dc5c6d51 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 14 Jul 2011 21:30:18 +0000 Subject: Docs: MSI-HOWTO: Offset modifier with a comma, and insert `yet' for emphasis Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 1d7047a34862..515396a3d7e3 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -155,10 +155,10 @@ struct msix_entry { }; This allows for the device to use these interrupts in a sparse fashion; -for example it could use interrupts 3 and 1027 and allocate only a +for example, it could use interrupts 3 and 1027 and yet allocate only a two-element array. The driver is expected to fill in the 'entry' value -in each element of the array to indicate which entries it wants the kernel -to assign interrupts for. It is invalid to fill in two entries with the +in each element of the array to indicate for which entries the kernel +should assign interrupts; it is invalid to fill in two entries with the same number. 4.3.1 pci_enable_msix -- cgit v1.2.3 From ed737c1882c652f0b5a888df59895b5dc2d10cd7 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Mon, 18 Jul 2011 16:15:00 +0000 Subject: Docs: MSI-HOWTO: Insert `that' ... as per Randy Dunlap's wishes :-P Message-Id: <20110717114023.2b4cce91.rdunlap@xenotime.net> Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 515396a3d7e3..c504f12bcc61 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -190,7 +190,7 @@ during the initialization phase. It is ideal if drivers can cope with a variable number of MSI-X interrupts, there are many reasons why the platform may not be able to provide the -exact number a driver asks for. +exact number that a driver asks for. A request loop to achieve that might look like: -- cgit v1.2.3 From 6457d9b350b3f4f2098984eee016c6c994b9c096 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 14 Jul 2011 21:54:18 +0000 Subject: Docs: MSI-HOWTO: Move a sentence to another paragraph Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index c504f12bcc61..28d1ceeea655 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -171,7 +171,8 @@ which should be at least 'nvec' entries in size. On success, the device is switched into MSI-X mode and the function returns 0. The 'vector' member in each entry is populated with the interrupt number; the driver should then call request_irq() for each 'vector' that it -decides to use. +decides to use. The device driver is responsible for keeping track of the +interrupts assigned to the MSI-X vectors so it can free them again later. If this function returns a negative number, it indicates an error and the driver should not attempt to allocate any more MSI-X interrupts for @@ -181,9 +182,7 @@ below. This function, in contrast with pci_enable_msi(), does not adjust dev->irq. The device will not generate interrupts for this interrupt -number once MSI-X is enabled. The device driver is responsible for -keeping track of the interrupts assigned to the MSI-X vectors so it can -free them again later. +number once MSI-X is enabled. Device drivers should normally call this function once per device during the initialization phase. -- cgit v1.2.3 From 5a84fc3162e06632ebea42cefe3b964299213d33 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 14 Jul 2011 21:55:05 +0000 Subject: Docs: MSI-HOWTO: , -> ; Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 28d1ceeea655..f533bc2f283c 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -187,7 +187,7 @@ number once MSI-X is enabled. Device drivers should normally call this function once per device during the initialization phase. -It is ideal if drivers can cope with a variable number of MSI-X interrupts, +It is ideal if drivers can cope with a variable number of MSI-X interrupts; there are many reasons why the platform may not be able to provide the exact number that a driver asks for. -- cgit v1.2.3 From e6ffceb0ded9beeaddd9c246b3fec298c6b1f0c9 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Thu, 14 Jul 2011 23:30:47 +0000 Subject: Docs: MSI-HOWTO: API -> function Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index f533bc2f283c..d9c8d989f2b3 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -211,7 +211,7 @@ static int foo_driver_enable_msix(struct foo_adapter *adapter, int nvec) void pci_disable_msix(struct pci_dev *dev) -This API should be used to undo the effect of pci_enable_msix(). It frees +This function should be used to undo the effect of pci_enable_msix(). It frees the previously allocated message signaled interrupts. The interrupts may subsequently be assigned to another device, so drivers should not cache the value of the 'vector' elements over a call to pci_disable_msix(). -- cgit v1.2.3 From e14bd7e614b57493e1cbefb8a06d3754bdd04e26 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Fri, 15 Jul 2011 03:12:13 +0000 Subject: Docs: MSI-HOWTO: Insert a comma Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index d9c8d989f2b3..c9cffaf16f82 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -231,7 +231,7 @@ mask or unmask an interrupt, it should call disable_irq() / enable_irq(). 4.4 Handling devices implementing both MSI and MSI-X capabilities If a device implements both MSI and MSI-X capabilities, it can -run in either MSI mode or MSI-X mode but not both simultaneously. +run in either MSI mode or MSI-X mode, but not both simultaneously. This is a requirement of the PCI spec, and it is enforced by the PCI layer. Calling pci_enable_msi() when MSI-X is already enabled or pci_enable_msix() when MSI is already enabled results in an error. -- cgit v1.2.3 From 952df55b5a30913f4a5536b12ad09dd95c66d83f Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Fri, 15 Jul 2011 03:15:10 +0000 Subject: Docs: MSI-HOWTO: may -> might Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index c9cffaf16f82..257628fdd464 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -250,7 +250,7 @@ the MSI-X facilities in preference to the MSI facilities. As mentioned above, MSI-X supports any number of interrupts between 1 and 2048. In constrast, MSI is restricted to a maximum of 32 interrupts (and must be a power of two). In addition, the MSI interrupt vectors must -be allocated consecutively, so the system may not be able to allocate +be allocated consecutively, so the system might not be able to allocate as many vectors for MSI as it could for MSI-X. On some platforms, MSI interrupts must all be targeted at the same set of CPUs whereas MSI-X interrupts can all be targeted at different CPUs. -- cgit v1.2.3 From e6b85a1f8a56d3c9db0273b7e4aaab802dc07a9b Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Fri, 15 Jul 2011 03:25:44 +0000 Subject: Docs: MSI-HOWTO: Use `unknown ...' rather than `... know about.' Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 257628fdd464..2322a570beb5 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -316,7 +316,7 @@ Some bridges allow you to enable MSIs by changing some bits in their PCI configuration space (especially the Hypertransport chipsets such as the nVidia nForce and Serverworks HT2000). As with host chipsets, Linux mostly knows about them and automatically enables MSIs if it can. -If you have a bridge which Linux doesn't yet know about, you can enable +If you have a bridge unknown to Linux, you can enable MSIs in configuration space using whatever method you know works, then enable MSIs on that bridge by doing: -- cgit v1.2.3 From 1b8386f61241471c722fbdff48e3d1d97bfca8e6 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Fri, 15 Jul 2011 03:26:37 +0000 Subject: Docs: MSI-HOWTO: can -> could Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 2322a570beb5..3b4727825287 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -326,7 +326,7 @@ where $bridge is the PCI address of the bridge you've enabled (eg 0000:00:0e.0). To disable MSIs, echo 0 instead of 1. Changing this value should be -done with caution as it can break interrupt handling for all devices +done with caution as it could break interrupt handling for all devices below this bridge. Again, please notify linux-pci@vger.kernel.org of any bridges that need -- cgit v1.2.3 From c2b65e181acb9a981c890489c0f9a04d8e1b91f9 Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Fri, 15 Jul 2011 03:27:22 +0000 Subject: Docs: MSI-HOWTO: Insert a comma Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 3b4727825287..67ed5d89524c 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -335,7 +335,7 @@ special handling. 5.3. Disabling MSIs on a single device Some devices are known to have faulty MSI implementations. Usually this -is handled in the individual device driver but occasionally it's necessary +is handled in the individual device driver, but occasionally it's necessary to handle this with a quirk. Some drivers have an option to disable use of MSI. While this is a convenient workaround for the driver author, it is not good practise, and should not be emulated. -- cgit v1.2.3 From 798c794df81e0a1af62c1d7e48b464f4096f3b9a Mon Sep 17 00:00:00 2001 From: Michael Witten Date: Fri, 15 Jul 2011 03:29:04 +0000 Subject: Docs: MSI-HOWTO: MSI -> MSIs Signed-off-by: Michael Witten Acked-by: Matthew Wilcox Signed-off-by: Randy Dunlap --- Documentation/PCI/MSI-HOWTO.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index 67ed5d89524c..53e6fca146d7 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -349,7 +349,7 @@ for your machine. You should also check your .config to be sure you have enabled CONFIG_PCI_MSI. Then, 'lspci -t' gives the list of bridges above a device. Reading -/sys/bus/pci/devices/*/msi_bus will tell you whether MSI are enabled (1) +/sys/bus/pci/devices/*/msi_bus will tell you whether MSIs are enabled (1) or disabled (0). If 0 is found in any of the msi_bus files belonging to bridges between the PCI root and the device, MSIs are disabled. -- cgit v1.2.3 From ad75b88ac3792ae6a541d9b9fa84e379bd0b29dd Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 3 Aug 2011 12:33:20 +0900 Subject: serial: sh-sci: Fix up default regtype probing. Presently the default regtype probing inadvertently bails out due to an inverted error check. This fixes it up, and gets platforms without explicit regtype specifications working again. Reported-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/tty/serial/sh-sci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index d0a56235c50e..522f69d3c8ae 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1889,7 +1889,7 @@ static int __devinit sci_init_single(struct platform_device *dev, if (p->regtype == SCIx_PROBE_REGTYPE) { ret = sci_probe_regmap(p); - if (unlikely(!ret)) + if (unlikely(ret != 0)) return ret; } -- cgit v1.2.3 From 5beabc7fcd99856084e232b37d3280ce353eaf41 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Tue, 2 Aug 2011 09:42:54 +0000 Subject: serial: sh-sci: fix DMA build by including dma-mapping.h Include dma-mapping.h to fix build of the sh-sci driver on SH-Mobile ARM (sh73a0) when CONFIG_SERIAL_SH_SCI_DMA=y: drivers/tty/serial/sh-sci.c: In function 'sci_rx_dma_release': drivers/tty/serial/sh-sci.c:1182:3: error: implicit declaration of function 'dma_free_coherent' drivers/tty/serial/sh-sci.c: In function 'work_fn_tx': drivers/tty/serial/sh-sci.c:1333:2: error: implicit declaration of function 'dma_sync_sg_for_device' drivers/tty/serial/sh-sci.c: In function 'sci_request_dma': drivers/tty/serial/sh-sci.c:1498:3: error: implicit declaration of function 'dma_map_sg' drivers/tty/serial/sh-sci.c:1527:3: error: implicit declaration of function 'dma_alloc_coherent' drivers/tty/serial/sh-sci.c:1527:10: warning: assignment makes pointer from integer without a cast make[3]: *** [drivers/tty/serial/sh-sci.o] Error 1 make[2]: *** [drivers/tty/serial] Error 2 make[1]: *** [drivers/tty] Error 2 make: *** [drivers] Error 2 Signed-off-by: Magnus Damm Tested-by: Simon Horman Signed-off-by: Paul Mundt --- drivers/tty/serial/sh-sci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 522f69d3c8ae..38a81ae9b7df 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From c84b51e65ea2f256353c339bd87e991b7e64630f Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Sun, 31 Jul 2011 21:36:35 +0000 Subject: sh: Fix conflicting definitions of ptrace_triggered The extra nmi argument is causing this compile fail: CC arch/sh/kernel/ptrace_32.o arch/sh/kernel/ptrace_32.c:66:6: error: conflicting types for 'ptrace_triggered' arch/sh/include/asm/ptrace.h:126:13: note: previous declaration of 'ptrace_triggered' was here make[3]: *** [arch/sh/kernel/ptrace_32.o] Error 1 Signed-off-by: Paul Gortmaker Signed-off-by: Paul Mundt --- arch/sh/include/asm/ptrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h index b97baf81a87b..2d3679b2447f 100644 --- a/arch/sh/include/asm/ptrace.h +++ b/arch/sh/include/asm/ptrace.h @@ -123,7 +123,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, struct perf_event; struct perf_sample_data; -extern void ptrace_triggered(struct perf_event *bp, int nmi, +extern void ptrace_triggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs); #define task_pt_regs(task) \ -- cgit v1.2.3 From 1ba762209491e2496e58baffa3fd65d661f54404 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 3 Aug 2011 03:47:36 +0000 Subject: serial: sh-sci: console Runtime PM support Add Runtime PM context save/restore support to the SCIF driver. Tested on the AP4EVB console. Signed-off-by: Magnus Damm Signed-off-by: Paul Mundt --- drivers/tty/serial/sh-sci.c | 68 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 38a81ae9b7df..ffcaceee0215 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -96,6 +96,12 @@ struct sci_port { #endif struct notifier_block freq_transition; + +#ifdef CONFIG_SERIAL_SH_SCI_CONSOLE + unsigned short saved_smr; + unsigned short saved_fcr; + unsigned char saved_brr; +#endif }; /* Function prototypes */ @@ -1634,11 +1640,25 @@ static unsigned int sci_scbrr_calc(unsigned int algo_id, unsigned int bps, return ((freq + 16 * bps) / (32 * bps) - 1); } +static void sci_reset(struct uart_port *port) +{ + unsigned int status; + + do { + status = sci_in(port, SCxSR); + } while (!(status & SCxSR_TEND(port))); + + sci_out(port, SCSCR, 0x00); /* TE=0, RE=0, CKE1=0 */ + + if (port->type != PORT_SCI) + sci_out(port, SCFCR, SCFCR_RFRST | SCFCR_TFRST); +} + static void sci_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old) { struct sci_port *s = to_sci_port(port); - unsigned int status, baud, smr_val, max_baud; + unsigned int baud, smr_val, max_baud; int t = -1; u16 scfcr = 0; @@ -1658,14 +1678,7 @@ static void sci_set_termios(struct uart_port *port, struct ktermios *termios, sci_port_enable(s); - do { - status = sci_in(port, SCxSR); - } while (!(status & SCxSR_TEND(port))); - - sci_out(port, SCSCR, 0x00); /* TE=0, RE=0, CKE1=0 */ - - if (port->type != PORT_SCI) - sci_out(port, SCFCR, scfcr | SCFCR_RFRST | SCFCR_TFRST); + sci_reset(port); smr_val = sci_in(port, SCSMR) & 3; @@ -2037,7 +2050,8 @@ static int __devinit serial_console_setup(struct console *co, char *options) if (options) uart_parse_options(options, &baud, &parity, &bits, &flow); - /* TODO: disable clock */ + sci_port_disable(sci_port); + return uart_set_options(port, co, baud, parity, bits, flow); } @@ -2080,6 +2094,36 @@ static int __devinit sci_probe_earlyprintk(struct platform_device *pdev) return 0; } +#define uart_console(port) ((port)->cons->index == (port)->line) + +static int sci_runtime_suspend(struct device *dev) +{ + struct sci_port *sci_port = dev_get_drvdata(dev); + struct uart_port *port = &sci_port->port; + + if (uart_console(port)) { + sci_port->saved_smr = sci_in(port, SCSMR); + sci_port->saved_brr = sci_in(port, SCBRR); + sci_port->saved_fcr = sci_in(port, SCFCR); + } + return 0; +} + +static int sci_runtime_resume(struct device *dev) +{ + struct sci_port *sci_port = dev_get_drvdata(dev); + struct uart_port *port = &sci_port->port; + + if (uart_console(port)) { + sci_reset(port); + sci_out(port, SCSMR, sci_port->saved_smr); + sci_out(port, SCBRR, sci_port->saved_brr); + sci_out(port, SCFCR, sci_port->saved_fcr); + sci_out(port, SCSCR, sci_port->cfg->scscr); + } + return 0; +} + #define SCI_CONSOLE (&serial_console) #else @@ -2089,6 +2133,8 @@ static inline int __devinit sci_probe_earlyprintk(struct platform_device *pdev) } #define SCI_CONSOLE NULL +#define sci_runtime_suspend NULL +#define sci_runtime_resume NULL #endif /* CONFIG_SERIAL_SH_SCI_CONSOLE */ @@ -2204,6 +2250,8 @@ static int sci_resume(struct device *dev) } static const struct dev_pm_ops sci_dev_pm_ops = { + .runtime_suspend = sci_runtime_suspend, + .runtime_resume = sci_runtime_resume, .suspend = sci_suspend, .resume = sci_resume, }; -- cgit v1.2.3 From f41c53a569c4cf0556893ec9cfcf697d069799e1 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 3 Aug 2011 15:02:55 +0200 Subject: block: swim3: fix unterminated of_device_id table of_device_id structures need a NULL terminating entry, add it. Signed-off-by: Axel Lin Signed-off-by: Jens Axboe --- drivers/block/swim3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index 773bfa792777..ae3e167e17ad 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -1184,6 +1184,7 @@ static struct of_device_id swim3_match[] = { .compatible = "swim3" }, + { /* end of list */ } }; static struct macio_driver swim3_driver = -- cgit v1.2.3 From 88c9e42196285a7c573e2abda11a4b5037c669bc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 2 Aug 2011 09:57:35 +0200 Subject: nfs: add missing prefetch.h include Fix this compile error on s390: CC [M] fs/nfs/blocklayout/blocklayout.o fs/nfs/blocklayout/blocklayout.c: In function 'bl_end_io_read': fs/nfs/blocklayout/blocklayout.c:201:4: error: implicit declaration of function 'prefetchw' Introduced with 9549ec01 "pnfsblock: bl_read_pagelist". Cc: Fred Isaman Signed-off-by: Heiko Carstens Signed-off-by: Trond Myklebust --- fs/nfs/blocklayout/blocklayout.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index e56564d2ef95..9561c8fc8bdb 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -36,6 +36,7 @@ #include #include /* struct bio */ #include /* various write calls */ +#include #include "blocklayout.h" -- cgit v1.2.3 From 20618b21da0796115e81906d24ff1601552701b7 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 3 Aug 2011 21:54:33 -0700 Subject: pnfs-obj: Bug when we are running out of bio When we have a situation that the number of pages we want to encode is bigger then the size of the bio. (Which can currently happen only when all IO is going to a single device .e.g group_width==1) then the IO is submitted short and we report back only the amount of bytes we actually wrote/read and all is fine. BUT ... There was a bug that the current length counter was advanced before the fail to add the extra page, and we come to a situation that the CDB length was one-page longer then the actual bio size, which is of course rejected by the osd-target. While here also fix the bio size calculation, in the case that we received more then one group of devices. CC: Stable Tree Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 9383ca7245bc..aa8663a8938f 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -589,22 +589,19 @@ static void _calc_stripe_info(struct objio_state *ios, u64 file_offset, } static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, - unsigned pgbase, struct _objio_per_comp *per_dev, int cur_len, + unsigned pgbase, struct _objio_per_comp *per_dev, int len, gfp_t gfp_flags) { unsigned pg = *cur_pg; + int cur_len = len; struct request_queue *q = osd_request_queue(_io_od(ios, per_dev->dev)); - per_dev->length += cur_len; - if (per_dev->bio == NULL) { - unsigned stripes = ios->layout->num_comps / - ios->layout->mirrors_p1; - unsigned pages_in_stripe = stripes * + unsigned pages_in_stripe = ios->layout->group_width * (ios->layout->stripe_unit / PAGE_SIZE); unsigned bio_size = (ios->ol_state.nr_pages + pages_in_stripe) / - stripes; + ios->layout->group_width; if (BIO_MAX_PAGES_KMALLOC < bio_size) bio_size = BIO_MAX_PAGES_KMALLOC; @@ -632,6 +629,7 @@ static int _add_stripe_unit(struct objio_state *ios, unsigned *cur_pg, } BUG_ON(cur_len); + per_dev->length += len; *cur_pg = pg; return 0; } -- cgit v1.2.3 From 9af7db3228acc286c50e3a0f054ec982efdbc6c6 Mon Sep 17 00:00:00 2001 From: Boaz Harrosh Date: Wed, 3 Aug 2011 21:52:51 -0700 Subject: pnfs-obj: Fix the comp_index != 0 case There were bugs in the case of partial layout where olo_comp_index is not zero. This used to work and was tested but one of the later cleanup SQUASHMEs broke it and was not tested since. Also add a dprint that specify those received layout parameters. Everything else was already printed. [Needed in v3.0] CC: Stable Tree Signed-off-by: Boaz Harrosh Signed-off-by: Trond Myklebust --- fs/nfs/objlayout/objio_osd.c | 16 +++++++--------- fs/nfs/objlayout/pnfs_osd_xdr_cli.c | 3 +++ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index aa8663a8938f..d0cda12fddc3 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -479,7 +479,6 @@ static int _io_check(struct objio_state *ios, bool is_write) for (i = 0; i < ios->numdevs; i++) { struct osd_sense_info osi; struct osd_request *or = ios->per_dev[i].or; - unsigned dev; int ret; if (!or) @@ -500,9 +499,8 @@ static int _io_check(struct objio_state *ios, bool is_write) continue; /* we recovered */ } - dev = ios->per_dev[i].dev; - objlayout_io_set_result(&ios->ol_state, dev, - &ios->layout->comps[dev].oc_object_id, + objlayout_io_set_result(&ios->ol_state, i, + &ios->layout->comps[i].oc_object_id, osd_pri_2_pnfs_err(osi.osd_err_pri), ios->per_dev[i].offset, ios->per_dev[i].length, @@ -648,7 +646,7 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, int ret = 0; while (length) { - struct _objio_per_comp *per_dev = &ios->per_dev[dev]; + struct _objio_per_comp *per_dev = &ios->per_dev[dev - first_dev]; unsigned cur_len, page_off = 0; if (!per_dev->length) { @@ -668,8 +666,8 @@ static int _prepare_one_group(struct objio_state *ios, u64 length, cur_len = stripe_unit; } - if (max_comp < dev) - max_comp = dev; + if (max_comp < dev - first_dev) + max_comp = dev - first_dev; } else { cur_len = stripe_unit; } @@ -804,7 +802,7 @@ static int _read_mirrors(struct objio_state *ios, unsigned cur_comp) struct _objio_per_comp *per_dev = &ios->per_dev[cur_comp]; unsigned dev = per_dev->dev; struct pnfs_osd_object_cred *cred = - &ios->layout->comps[dev]; + &ios->layout->comps[cur_comp]; struct osd_obj_id obj = { .partition = cred->oc_object_id.oid_partition_id, .id = cred->oc_object_id.oid_object_id, @@ -902,7 +900,7 @@ static int _write_mirrors(struct objio_state *ios, unsigned cur_comp) for (; cur_comp < last_comp; ++cur_comp, ++dev) { struct osd_request *or = NULL; struct pnfs_osd_object_cred *cred = - &ios->layout->comps[dev]; + &ios->layout->comps[cur_comp]; struct osd_obj_id obj = { .partition = cred->oc_object_id.oid_partition_id, .id = cred->oc_object_id.oid_object_id, diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c index 16fc758e9123..b3918f7ac34d 100644 --- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c +++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c @@ -170,6 +170,9 @@ int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout, p = _osd_xdr_decode_data_map(p, &layout->olo_map); layout->olo_comps_index = be32_to_cpup(p++); layout->olo_num_comps = be32_to_cpup(p++); + dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__, + layout->olo_comps_index, layout->olo_num_comps); + iter->total_comps = layout->olo_num_comps; return 0; } -- cgit v1.2.3 From 55a673990ec04cf63005318bcf08c2b0046e5778 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Aug 2011 14:46:29 -0400 Subject: NFSv4.1: Fix the callback 'highest_used_slotid' behaviour Currently, there is no guarantee that we will call nfs4_cb_take_slot() even though nfs4_callback_compound() will consistently call nfs4_cb_free_slot() provided the cb_process_state has set the 'clp' field. The result is that we can trigger the BUG_ON() upon the next call to nfs4_cb_take_slot(). This patch fixes the above problem by using the slot id that was taken in the CB_SEQUENCE operation as a flag for whether or not we need to call nfs4_cb_free_slot(). It also fixes an atomicity problem: we need to set tbl->highest_used_slotid atomically with the check for NFS4_SESSION_DRAINING, otherwise we end up racing with the various tests in nfs4_begin_drain_session(). Cc: stable@kernel.org [2.6.38+] Signed-off-by: Trond Myklebust --- fs/nfs/callback.h | 2 +- fs/nfs/callback_proc.c | 20 ++++++++++++++------ fs/nfs/callback_xdr.c | 24 +++++++----------------- 3 files changed, 22 insertions(+), 24 deletions(-) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index b257383bb565..07df5f1d85e5 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -38,6 +38,7 @@ enum nfs4_callback_opnum { struct cb_process_state { __be32 drc_status; struct nfs_client *clp; + int slotid; }; struct cb_compound_hdr_arg { @@ -166,7 +167,6 @@ extern unsigned nfs4_callback_layoutrecall( void *dummy, struct cb_process_state *cps); extern void nfs4_check_drain_bc_complete(struct nfs4_session *ses); -extern void nfs4_cb_take_slot(struct nfs_client *clp); struct cb_devicenotifyitem { uint32_t cbd_notify_type; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 74780f9f852c..0ab82020f5db 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -348,7 +348,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Normal */ if (likely(args->csa_sequenceid == slot->seq_nr + 1)) { slot->seq_nr++; - return htonl(NFS4_OK); + goto out_ok; } /* Replay */ @@ -367,11 +367,14 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args) /* Wraparound */ if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) { slot->seq_nr = 1; - return htonl(NFS4_OK); + goto out_ok; } /* Misordered request */ return htonl(NFS4ERR_SEQ_MISORDERED); +out_ok: + tbl->highest_used_slotid = args->csa_slotid; + return htonl(NFS4_OK); } /* @@ -433,26 +436,32 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, struct cb_sequenceres *res, struct cb_process_state *cps) { + struct nfs4_slot_table *tbl; struct nfs_client *clp; int i; __be32 status = htonl(NFS4ERR_BADSESSION); - cps->clp = NULL; - clp = nfs4_find_client_sessionid(args->csa_addr, &args->csa_sessionid); if (clp == NULL) goto out; + tbl = &clp->cl_session->bc_slot_table; + + spin_lock(&tbl->slot_tbl_lock); /* state manager is resetting the session */ if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { - status = NFS4ERR_DELAY; + spin_unlock(&tbl->slot_tbl_lock); + status = htonl(NFS4ERR_DELAY); goto out; } status = validate_seqid(&clp->cl_session->bc_slot_table, args); + spin_unlock(&tbl->slot_tbl_lock); if (status) goto out; + cps->slotid = args->csa_slotid; + /* * Check for pending referring calls. If a match is found, a * related callback was received before the response to the original @@ -469,7 +478,6 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, res->csr_slotid = args->csa_slotid; res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1; - nfs4_cb_take_slot(clp); out: cps->clp = clp; /* put in nfs4_callback_compound */ diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index c6c86a77e043..918ad647afea 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -754,26 +754,15 @@ static void nfs4_callback_free_slot(struct nfs4_session *session) * Let the state manager know callback processing done. * A single slot, so highest used slotid is either 0 or -1 */ - tbl->highest_used_slotid--; + tbl->highest_used_slotid = -1; nfs4_check_drain_bc_complete(session); spin_unlock(&tbl->slot_tbl_lock); } -static void nfs4_cb_free_slot(struct nfs_client *clp) +static void nfs4_cb_free_slot(struct cb_process_state *cps) { - if (clp && clp->cl_session) - nfs4_callback_free_slot(clp->cl_session); -} - -/* A single slot, so highest used slotid is either 0 or -1 */ -void nfs4_cb_take_slot(struct nfs_client *clp) -{ - struct nfs4_slot_table *tbl = &clp->cl_session->bc_slot_table; - - spin_lock(&tbl->slot_tbl_lock); - tbl->highest_used_slotid++; - BUG_ON(tbl->highest_used_slotid != 0); - spin_unlock(&tbl->slot_tbl_lock); + if (cps->slotid != -1) + nfs4_callback_free_slot(cps->clp->cl_session); } #else /* CONFIG_NFS_V4_1 */ @@ -784,7 +773,7 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op) return htonl(NFS4ERR_MINOR_VERS_MISMATCH); } -static void nfs4_cb_free_slot(struct nfs_client *clp) +static void nfs4_cb_free_slot(struct cb_process_state *cps) { } #endif /* CONFIG_NFS_V4_1 */ @@ -866,6 +855,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_process_state cps = { .drc_status = 0, .clp = NULL, + .slotid = -1, }; unsigned int nops = 0; @@ -906,7 +896,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r *hdr_res.status = status; *hdr_res.nops = htonl(nops); - nfs4_cb_free_slot(cps.clp); + nfs4_cb_free_slot(&cps); nfs_put_client(cps.clp); dprintk("%s: done, status = %u\n", __func__, ntohl(status)); return rpc_success; -- cgit v1.2.3 From 910ac68a2b80c7de95bc8488734067b1bb15d583 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 2 Aug 2011 14:46:52 -0400 Subject: NFSv4.1: Return NFS4ERR_BADSESSION to callbacks during session resets If the client is in the process of resetting the session when it receives a callback, then returning NFS4ERR_DELAY may cause a deadlock with the DESTROY_SESSION call. Basically, if the client returns NFS4ERR_DELAY in response to the CB_SEQUENCE call, then the server is entitled to believe that the client is busy because it is already processing that call. In that case, the server is perfectly entitled to respond with a NFS4ERR_BACK_CHAN_BUSY to any DESTROY_SESSION call. Fix this by having the client reply with a NFS4ERR_BADSESSION in response to the callback if it is resetting the session. Cc: stable@kernel.org [2.6.38+] Signed-off-by: Trond Myklebust --- fs/nfs/callback_proc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 0ab82020f5db..43926add945b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -452,6 +452,11 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args, if (test_bit(NFS4_SESSION_DRAINING, &clp->cl_session->session_state)) { spin_unlock(&tbl->slot_tbl_lock); status = htonl(NFS4ERR_DELAY); + /* Return NFS4ERR_BADSESSION if we're draining the session + * in order to reset it. + */ + if (test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) + status = htonl(NFS4ERR_BADSESSION); goto out; } -- cgit v1.2.3 From 18b08c55a9b04c8783420fb6657599ad724459cc Mon Sep 17 00:00:00 2001 From: Deepak Saxena Date: Thu, 4 Aug 2011 23:39:58 -0700 Subject: Input: remove CLOCK_TICK_RATE from analog joystick driver The analog joystick driver is written for x86 systems. This patch updates it to use the PIT_TICK_RATE value instead of CLOCK_TICK_RATE as they are equivalent on x86 and we want to deprecate the latter. Signed-off-by: Deepak Saxena Signed-off-by: Dmitry Torokhov --- drivers/input/joystick/analog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/joystick/analog.c b/drivers/input/joystick/analog.c index 4afe0a3b4884..c02131785a36 100644 --- a/drivers/input/joystick/analog.c +++ b/drivers/input/joystick/analog.c @@ -139,7 +139,7 @@ struct analog_port { #include #define GET_TIME(x) do { if (cpu_has_tsc) rdtscl(x); else x = get_time_pit(); } while (0) -#define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? CLOCK_TICK_RATE / HZ : 0))) +#define DELTA(x,y) (cpu_has_tsc ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0))) #define TIME_NAME (cpu_has_tsc?"TSC":"PIT") static unsigned int get_time_pit(void) { -- cgit v1.2.3 From 35ae66e0a09ab70ed588e65f26b4c725cd1656b6 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Fri, 5 Aug 2011 09:37:10 +0200 Subject: block: Make rq_affinity = 1 work as expected Commit 5757a6d76c introduced a new rq_affinity = 2 so as to make the request completed in the __make_request cpu. But it makes the old rq_affinity = 1 not work any more. The root cause is that if the 'cpu' and 'req->cpu' is in the same group and cpu != req->cpu, ccpu will be the same as group_cpu, so the completion will be excuted in the 'cpu' not 'group_cpu'. This patch fix problem by simpling removing group_cpu and the codes are more explicit now. If ccpu == cpu, we complete in cpu, otherwise we raise_blk_irq to ccpu. Cc: Christoph Hellwig Cc: Roland Dreier Cc: Dan Williams Cc: Jens Axboe Signed-off-by: Tao Ma Reviewed-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-softirq.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 475fab809a80..487addc85bb5 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -103,7 +103,7 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = { void __blk_complete_request(struct request *req) { - int ccpu, cpu, group_cpu = NR_CPUS; + int ccpu, cpu; struct request_queue *q = req->q; unsigned long flags; @@ -117,14 +117,12 @@ void __blk_complete_request(struct request *req) */ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { ccpu = req->cpu; - if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { + if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) ccpu = blk_cpu_to_group(ccpu); - group_cpu = blk_cpu_to_group(cpu); - } } else ccpu = cpu; - if (ccpu == cpu || ccpu == group_cpu) { + if (ccpu == cpu) { struct list_head *list; do_local: list = &__get_cpu_var(blk_cpu_done); -- cgit v1.2.3 From 4931402a9dd00b2997e95bfbb89409b2a6dbb383 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 5 Aug 2011 09:42:20 +0200 Subject: cfq-iosched: Add documentation about idling There are always questions about why CFQ is idling on various conditions. Recent ones is Christoph asking again why to idle on REQ_NOIDLE. His assertion is that XFS is relying more and more on workqueues and is concerned that CFQ idling on IO from every workqueue will impact XFS badly. So he suggested that I add some more documentation about CFQ idling and that can provide more clarity on the topic and also gives an opprotunity to poke a hole in theory and lead to improvements. So here is my attempt at that. Any comments are welcome. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- Documentation/block/cfq-iosched.txt | 71 +++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/Documentation/block/cfq-iosched.txt b/Documentation/block/cfq-iosched.txt index e578feed6d81..6d670f570451 100644 --- a/Documentation/block/cfq-iosched.txt +++ b/Documentation/block/cfq-iosched.txt @@ -43,3 +43,74 @@ If one sets slice_idle=0 and if storage supports NCQ, CFQ internally switches to IOPS mode and starts providing fairness in terms of number of requests dispatched. Note that this mode switching takes effect only for group scheduling. For non-cgroup users nothing should change. + +CFQ IO scheduler Idling Theory +=============================== +Idling on a queue is primarily about waiting for the next request to come +on same queue after completion of a request. In this process CFQ will not +dispatch requests from other cfq queues even if requests are pending there. + +The rationale behind idling is that it can cut down on number of seeks +on rotational media. For example, if a process is doing dependent +sequential reads (next read will come on only after completion of previous +one), then not dispatching request from other queue should help as we +did not move the disk head and kept on dispatching sequential IO from +one queue. + +CFQ has following service trees and various queues are put on these trees. + + sync-idle sync-noidle async + +All cfq queues doing synchronous sequential IO go on to sync-idle tree. +On this tree we idle on each queue individually. + +All synchronous non-sequential queues go on sync-noidle tree. Also any +request which are marked with REQ_NOIDLE go on this service tree. On this +tree we do not idle on individual queues instead idle on the whole group +of queues or the tree. So if there are 4 queues waiting for IO to dispatch +we will idle only once last queue has dispatched the IO and there is +no more IO on this service tree. + +All async writes go on async service tree. There is no idling on async +queues. + +CFQ has some optimizations for SSDs and if it detects a non-rotational +media which can support higher queue depth (multiple requests at in +flight at a time), then it cuts down on idling of individual queues and +all the queues move to sync-noidle tree and only tree idle remains. This +tree idling provides isolation with buffered write queues on async tree. + +FAQ +=== +Q1. Why to idle at all on queues marked with REQ_NOIDLE. + +A1. We only do tree idle (all queues on sync-noidle tree) on queues marked + with REQ_NOIDLE. This helps in providing isolation with all the sync-idle + queues. Otherwise in presence of many sequential readers, other + synchronous IO might not get fair share of disk. + + For example, if there are 10 sequential readers doing IO and they get + 100ms each. If a REQ_NOIDLE request comes in, it will be scheduled + roughly after 1 second. If after completion of REQ_NOIDLE request we + do not idle, and after a couple of milli seconds a another REQ_NOIDLE + request comes in, again it will be scheduled after 1second. Repeat it + and notice how a workload can lose its disk share and suffer due to + multiple sequential readers. + + fsync can generate dependent IO where bunch of data is written in the + context of fsync, and later some journaling data is written. Journaling + data comes in only after fsync has finished its IO (atleast for ext4 + that seemed to be the case). Now if one decides not to idle on fsync + thread due to REQ_NOIDLE, then next journaling write will not get + scheduled for another second. A process doing small fsync, will suffer + badly in presence of multiple sequential readers. + + Hence doing tree idling on threads using REQ_NOIDLE flag on requests + provides isolation from multiple sequential readers and at the same + time we do not idle on individual threads. + +Q2. When to specify REQ_NOIDLE +A2. I would think whenever one is doing synchronous write and not expecting + more writes to be dispatched from same context soon, should be able + to specify REQ_NOIDLE on writes and that probably should work well for + most of the cases. -- cgit v1.2.3 From 2ab1ba68aeaecd41c4b34f0eaf1d70a37367fb1a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 4 Aug 2011 14:28:36 -0400 Subject: Btrfs: force unplugs when switching from high to regular priority bios Btrfs does bio submissions from a worker thread, and each device has a list of high priority bios and regular priority bios. Synchronous writes go to the high priority thread while async writes go to regular list. This commit brings back an explicit unplug any time we switch from high to regular priority, which makes it easier for the block layer to give us low latencies. Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 53875ae73ad4..3c5f2fcd82c1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -142,6 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) unsigned long limit; unsigned long last_waited = 0; int force_reg = 0; + int sync_pending; struct blk_plug plug; /* @@ -229,6 +230,22 @@ loop_lock: BUG_ON(atomic_read(&cur->bi_cnt) == 0); + /* + * if we're doing the sync list, record that our + * plug has some sync requests on it + * + * If we're doing the regular list and there are + * sync requests sitting around, unplug before + * we add more + */ + if (pending_bios == &device->pending_sync_bios) { + sync_pending = 1; + } else if (sync_pending) { + blk_finish_plug(&plug); + blk_start_plug(&plug); + sync_pending = 0; + } + submit_bio(cur->bi_rw, cur); num_run++; batch_run++; -- cgit v1.2.3 From a3ea14df0e383f44dcb2e61badb71180dbffe526 Mon Sep 17 00:00:00 2001 From: Paul Fox Date: Tue, 26 Jul 2011 16:42:26 +0100 Subject: x86, olpc: Wait for last byte of EC command to be accepted When executing EC commands, only waiting when there are still more bytes to write is usually fine. However, if the system suspends very quickly after a call to olpc_ec_cmd(), the last data byte may not yet be transferred to the EC, and the command will not complete. This solves a bug where the SCI wakeup mask was not correctly written when going into suspend. It means that sometimes, on XO-1.5 (but not XO-1), the devices that were marked as wakeup sources can't wake up the system. e.g. you ask for wifi wakeups, suspend, but then incoming wifi frames don't wake up the system as they should. Signed-off-by: Paul Fox Signed-off-by: Daniel Drake Acked-by: Andres Salomon Cc: Signed-off-by: Ingo Molnar --- arch/x86/platform/olpc/olpc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index 8b9940e78e2f..7cce722667b8 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c @@ -161,13 +161,13 @@ restart: if (inbuf && inlen) { /* write data to EC */ for (i = 0; i < inlen; i++) { + pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]); + outb(inbuf[i], 0x68); if (wait_on_ibf(0x6c, 0)) { printk(KERN_ERR "olpc-ec: timeout waiting for" " EC accept data!\n"); goto err; } - pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]); - outb(inbuf[i], 0x68); } } if (outbuf && outlen) { -- cgit v1.2.3 From 05e33fc20ea5e493a2a1e7f1d04f43cdf89f83ed Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Fri, 5 Aug 2011 09:09:00 -0500 Subject: x86, UV: Remove UV delay in starting slave cpus Delete the 10 msec delay between the INIT and SIPI when starting slave cpus. I can find no requirement for this delay. BIOS also has similar code sequences without the delay. Removing the delay reduces boot time by 40 sec. Every bit helps. Signed-off-by: Jack Steiner Cc: Link: http://lkml.kernel.org/r/20110805140900.GA6774@sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index adc66c3a1fef..34b18594e724 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_INIT; uv_write_global_mmr64(pnode, UVH_IPI_INT, val); - mdelay(10); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | -- cgit v1.2.3 From c66d3fcbf306af3c0c4b6f4e0d81467f89c67702 Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 8 Aug 2011 16:30:11 +0900 Subject: sh: Fix up fallout from cpuidle changes. Fixes up the pm_idle redefinition that was introduced with the earlier cpuidle changes. Signed-off-by: Paul Mundt --- arch/sh/kernel/idle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/sh/kernel/idle.c b/arch/sh/kernel/idle.c index 32114e0941ae..db4ecd731a00 100644 --- a/arch/sh/kernel/idle.c +++ b/arch/sh/kernel/idle.c @@ -22,7 +22,7 @@ #include #include -static void (*pm_idle)(void); +void (*pm_idle)(void); static int hlt_counter; -- cgit v1.2.3 From b3623080ff6974e696710b6c6eb4cdbf2bbab347 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 3 Aug 2011 06:08:54 +0000 Subject: mmc: sdhi, mmcif: zboot: Correct clock disable logic This corrects a logic-error that I made in the original implementation. An alternate patch would be to just remove these lines and leave the clock running as it is reconfigured later on during boot anyway. Signed-off-by: Simon Horman Signed-off-by: Paul Mundt --- arch/arm/boot/compressed/mmcif-sh7372.c | 2 +- arch/arm/boot/compressed/sdhi-sh7372.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/compressed/mmcif-sh7372.c b/arch/arm/boot/compressed/mmcif-sh7372.c index b6f61d9a5a1b..672ae95db5c3 100644 --- a/arch/arm/boot/compressed/mmcif-sh7372.c +++ b/arch/arm/boot/compressed/mmcif-sh7372.c @@ -82,7 +82,7 @@ asmlinkage void mmc_loader(unsigned char *buf, unsigned long len) /* Disable clock to MMC hardware block */ - __raw_writel(__raw_readl(SMSTPCR3) & (1 << 12), SMSTPCR3); + __raw_writel(__raw_readl(SMSTPCR3) | (1 << 12), SMSTPCR3); mmc_update_progress(MMC_PROGRESS_DONE); } diff --git a/arch/arm/boot/compressed/sdhi-sh7372.c b/arch/arm/boot/compressed/sdhi-sh7372.c index d403a8b24d7f..d279294f2381 100644 --- a/arch/arm/boot/compressed/sdhi-sh7372.c +++ b/arch/arm/boot/compressed/sdhi-sh7372.c @@ -85,7 +85,7 @@ asmlinkage void mmc_loader(unsigned short *buf, unsigned long len) goto err; /* Disable clock to SDHI1 hardware block */ - __raw_writel(__raw_readl(SMSTPCR3) & (1 << 13), SMSTPCR3); + __raw_writel(__raw_readl(SMSTPCR3) | (1 << 13), SMSTPCR3); mmc_update_progress(MMC_PROGRESS_DONE); -- cgit v1.2.3 From fe43756143c4efa6f8fdef07aa3ca4dc0be24ada Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 17 Jun 2011 08:21:21 +0000 Subject: ARM: mach-shmobile: mackerel: Add USB-DMA ID This patch use channel0 as Tx, and channel1 as Rx Signed-off-by: Kuninori Morimoto Signed-off-by: Paul Mundt --- arch/arm/mach-shmobile/board-mackerel.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index d41c01f83f15..e1d9b6af3f94 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -641,6 +641,8 @@ static struct usbhs_private usbhs0_private = { }, .driver_param = { .buswait_bwait = 4, + .d0_tx_id = SHDMA_SLAVE_USB0_TX, + .d1_rx_id = SHDMA_SLAVE_USB0_RX, }, }, }; @@ -810,6 +812,8 @@ static struct usbhs_private usbhs1_private = { .buswait_bwait = 4, .pipe_type = usbhs1_pipe_cfg, .pipe_size = ARRAY_SIZE(usbhs1_pipe_cfg), + .d0_tx_id = SHDMA_SLAVE_USB1_TX, + .d1_rx_id = SHDMA_SLAVE_USB1_RX, }, }, }; -- cgit v1.2.3 From 37fb3a30b46237f23cfdf7ee09d49f9888dd13bf Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 8 Aug 2011 16:08:08 +0200 Subject: fuse: fix flock Commit a9ff4f87 "fuse: support BSD locking semantics" overlooked a number of issues with supporing flock locks over existing POSIX locking infrastructure: - it's not backward compatible, passing flock(2) calls to userspace unconditionally (if userspace sets FUSE_POSIX_LOCKS) - it doesn't cater for the fact that flock locks are automatically unlocked on file release - it doesn't take into account the fact that flock exclusive locks (write locks) don't need an fd opened for write. The last one invalidates the original premise of the patch that flock locks can be emulated with POSIX locks. This patch fixes the first two issues. The last one needs to be fixed in userspace if the filesystem assumed that a write lock will happen only on a file operned for write (as in the case of the current fuse library). Reported-by: Sebastian Pipping Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 11 ++++++++++- fs/fuse/fuse_i.h | 8 +++++++- fs/fuse/inode.c | 8 +++++++- include/linux/fuse.h | 9 ++++++++- 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 82a66466a24c..e32784924355 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -245,6 +245,12 @@ void fuse_release_common(struct file *file, int opcode) req = ff->reserved_req; fuse_prepare_release(ff, file->f_flags, opcode); + if (ff->flock) { + struct fuse_release_in *inarg = &req->misc.release.in; + inarg->release_flags |= FUSE_RELEASE_FLOCK_UNLOCK; + inarg->lock_owner = fuse_lock_owner_id(ff->fc, + (fl_owner_t) file); + } /* Hold vfsmount and dentry until release is finished */ path_get(&file->f_path); req->misc.release.path = file->f_path; @@ -1547,11 +1553,14 @@ static int fuse_file_flock(struct file *file, int cmd, struct file_lock *fl) struct fuse_conn *fc = get_fuse_conn(inode); int err; - if (fc->no_lock) { + if (fc->no_flock) { err = flock_lock_file_wait(file, fl); } else { + struct fuse_file *ff = file->private_data; + /* emulate flock with POSIX locks */ fl->fl_owner = (fl_owner_t) file; + ff->flock = true; err = fuse_setlk(file, fl, 1); } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index b788becada76..eb8c6135fbbf 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -135,6 +135,9 @@ struct fuse_file { /** Wait queue head for poll */ wait_queue_head_t poll_wait; + + /** Has flock been performed on this file? */ + bool flock:1; }; /** One input argument of a request */ @@ -448,7 +451,7 @@ struct fuse_conn { /** Is removexattr not implemented by fs? */ unsigned no_removexattr:1; - /** Are file locking primitives not implemented by fs? */ + /** Are posix file locking primitives not implemented by fs? */ unsigned no_lock:1; /** Is access not implemented by fs? */ @@ -472,6 +475,9 @@ struct fuse_conn { /** Don't apply umask to creation modes */ unsigned dont_mask:1; + /** Are BSD file locking primitives not implemented by fs? */ + unsigned no_flock:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 5354906e797c..f541d639844b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -809,6 +809,10 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) fc->async_read = 1; if (!(arg->flags & FUSE_POSIX_LOCKS)) fc->no_lock = 1; + if (arg->minor >= 17) { + if (!(arg->flags & FUSE_FLOCK_LOCKS)) + fc->no_flock = 1; + } if (arg->flags & FUSE_ATOMIC_O_TRUNC) fc->atomic_o_trunc = 1; if (arg->minor >= 9) { @@ -823,6 +827,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) } else { ra_pages = fc->max_read / PAGE_CACHE_SIZE; fc->no_lock = 1; + fc->no_flock = 1; } fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages); @@ -843,7 +848,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) arg->minor = FUSE_KERNEL_MINOR_VERSION; arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE; arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC | - FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK; + FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | + FUSE_FLOCK_LOCKS; req->in.h.opcode = FUSE_INIT; req->in.numargs = 1; req->in.args[0].size = sizeof(*arg); diff --git a/include/linux/fuse.h b/include/linux/fuse.h index d464de53db43..464cff526860 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -47,6 +47,9 @@ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct * fuse_ioctl_iovec' instead of ambiguous 'struct iovec' * - add FUSE_IOCTL_32BIT flag + * + * 7.17 + * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK */ #ifndef _LINUX_FUSE_H @@ -78,7 +81,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 16 +#define FUSE_KERNEL_MINOR_VERSION 17 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -153,8 +156,10 @@ struct fuse_file_lock { /** * INIT request/reply flags * + * FUSE_POSIX_LOCKS: remote locking for POSIX file locks * FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".." * FUSE_DONT_MASK: don't apply umask to file mode on create operations + * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks */ #define FUSE_ASYNC_READ (1 << 0) #define FUSE_POSIX_LOCKS (1 << 1) @@ -163,6 +168,7 @@ struct fuse_file_lock { #define FUSE_EXPORT_SUPPORT (1 << 4) #define FUSE_BIG_WRITES (1 << 5) #define FUSE_DONT_MASK (1 << 6) +#define FUSE_FLOCK_LOCKS (1 << 10) /** * CUSE INIT request/reply flags @@ -175,6 +181,7 @@ struct fuse_file_lock { * Release flags */ #define FUSE_RELEASE_FLUSH (1 << 0) +#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1) /** * Getattr flags -- cgit v1.2.3 From b40cdd56dfa065c0832905e266b39f79419e6914 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 25 Jul 2011 22:35:34 +0200 Subject: fuse: delete dead .write_begin and .write_end aops Ever since 'ea9b990 fuse: implement perform_write', the .write_begin and .write_end aops have been dead code. Their task - acquiring a page from the page cache, sending out a write request and releasing the page again - is now done batch-wise to maximize the number of pages send per userspace request. Signed-off-by: Johannes Weiner Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 70 ---------------------------------------------------------- 1 file changed, 70 deletions(-) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e32784924355..ab5b84ef4354 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -749,18 +749,6 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file, return req->misc.write.out.size; } -static int fuse_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata) -{ - pgoff_t index = pos >> PAGE_CACHE_SHIFT; - - *pagep = grab_cache_page_write_begin(mapping, index, flags); - if (!*pagep) - return -ENOMEM; - return 0; -} - void fuse_write_update_size(struct inode *inode, loff_t pos) { struct fuse_conn *fc = get_fuse_conn(inode); @@ -773,62 +761,6 @@ void fuse_write_update_size(struct inode *inode, loff_t pos) spin_unlock(&fc->lock); } -static int fuse_buffered_write(struct file *file, struct inode *inode, - loff_t pos, unsigned count, struct page *page) -{ - int err; - size_t nres; - struct fuse_conn *fc = get_fuse_conn(inode); - unsigned offset = pos & (PAGE_CACHE_SIZE - 1); - struct fuse_req *req; - - if (is_bad_inode(inode)) - return -EIO; - - /* - * Make sure writepages on the same page are not mixed up with - * plain writes. - */ - fuse_wait_on_page_writeback(inode, page->index); - - req = fuse_get_req(fc); - if (IS_ERR(req)) - return PTR_ERR(req); - - req->in.argpages = 1; - req->num_pages = 1; - req->pages[0] = page; - req->page_offset = offset; - nres = fuse_send_write(req, file, pos, count, NULL); - err = req->out.h.error; - fuse_put_request(fc, req); - if (!err && !nres) - err = -EIO; - if (!err) { - pos += nres; - fuse_write_update_size(inode, pos); - if (count == PAGE_CACHE_SIZE) - SetPageUptodate(page); - } - fuse_invalidate_attr(inode); - return err ? err : nres; -} - -static int fuse_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct inode *inode = mapping->host; - int res = 0; - - if (copied) - res = fuse_buffered_write(file, inode, pos, copied, page); - - unlock_page(page); - page_cache_release(page); - return res; -} - static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, struct inode *inode, loff_t pos, size_t count) @@ -2181,8 +2113,6 @@ static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, .writepage = fuse_writepage, .launder_page = fuse_launder_page, - .write_begin = fuse_write_begin, - .write_end = fuse_write_end, .readpages = fuse_readpages, .set_page_dirty = __set_page_dirty_nobuffers, .bmap = fuse_bmap, -- cgit v1.2.3 From 478e0841b3dce3edc2c67bf0fc51af30f582e9e2 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 25 Jul 2011 22:35:35 +0200 Subject: fuse: mark pages accessed when written to As fuse does not use the page cache library functions when userspace writes to a file, it did not benefit from 'c8236db mm: mark page accessed before we write_end()' that made sure pages are properly marked accessed when written to. Signed-off-by: Johannes Weiner Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fuse/file.c b/fs/fuse/file.c index ab5b84ef4354..7155f49b2ef6 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -14,6 +14,7 @@ #include #include #include +#include static const struct file_operations fuse_direct_io_file_operations; @@ -834,6 +835,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, pagefault_enable(); flush_dcache_page(page); + mark_page_accessed(page); + if (!tmp) { unlock_page(page); page_cache_release(page); -- cgit v1.2.3 From de842eff41017721312d2747bcbee89c1beda6d0 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 6 Aug 2011 10:30:45 -0700 Subject: drm/i915: Wait for LVDS panel power sequence During mode setting, check to make sure the panel power sequencing has completed before doing further operations on the device. This uncovered errors with DPMS not turning the device off as it was left locked. Signed-off-by: Keith Packard Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/intel_lvds.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 2e8ddfcba40c..63188285d7f9 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -72,14 +72,16 @@ static void intel_lvds_enable(struct intel_lvds *intel_lvds) { struct drm_device *dev = intel_lvds->base.base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - u32 ctl_reg, lvds_reg; + u32 ctl_reg, lvds_reg, stat_reg; if (HAS_PCH_SPLIT(dev)) { ctl_reg = PCH_PP_CONTROL; lvds_reg = PCH_LVDS; + stat_reg = PCH_PP_STATUS; } else { ctl_reg = PP_CONTROL; lvds_reg = LVDS; + stat_reg = PP_STATUS; } I915_WRITE(lvds_reg, I915_READ(lvds_reg) | LVDS_PORT_EN); @@ -94,17 +96,16 @@ static void intel_lvds_enable(struct intel_lvds *intel_lvds) DRM_DEBUG_KMS("applying panel-fitter: %x, %x\n", intel_lvds->pfit_control, intel_lvds->pfit_pgm_ratios); - if (wait_for((I915_READ(PP_STATUS) & PP_ON) == 0, 1000)) { - DRM_ERROR("timed out waiting for panel to power off\n"); - } else { - I915_WRITE(PFIT_PGM_RATIOS, intel_lvds->pfit_pgm_ratios); - I915_WRITE(PFIT_CONTROL, intel_lvds->pfit_control); - intel_lvds->pfit_dirty = false; - } + + I915_WRITE(PFIT_PGM_RATIOS, intel_lvds->pfit_pgm_ratios); + I915_WRITE(PFIT_CONTROL, intel_lvds->pfit_control); + intel_lvds->pfit_dirty = false; } I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON); POSTING_READ(lvds_reg); + if (wait_for((I915_READ(stat_reg) & PP_ON) != 0, 1000)) + DRM_ERROR("timed out waiting for panel to power on\n"); intel_panel_enable_backlight(dev); } @@ -113,24 +114,25 @@ static void intel_lvds_disable(struct intel_lvds *intel_lvds) { struct drm_device *dev = intel_lvds->base.base.dev; struct drm_i915_private *dev_priv = dev->dev_private; - u32 ctl_reg, lvds_reg; + u32 ctl_reg, lvds_reg, stat_reg; if (HAS_PCH_SPLIT(dev)) { ctl_reg = PCH_PP_CONTROL; lvds_reg = PCH_LVDS; + stat_reg = PCH_PP_STATUS; } else { ctl_reg = PP_CONTROL; lvds_reg = LVDS; + stat_reg = PP_STATUS; } intel_panel_disable_backlight(dev); I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON); + if (wait_for((I915_READ(stat_reg) & PP_ON) == 0, 1000)) + DRM_ERROR("timed out waiting for panel to power off\n"); if (intel_lvds->pfit_control) { - if (wait_for((I915_READ(PP_STATUS) & PP_ON) == 0, 1000)) - DRM_ERROR("timed out waiting for panel to power off\n"); - I915_WRITE(PFIT_CONTROL, 0); intel_lvds->pfit_dirty = true; } -- cgit v1.2.3 From ed10fca9c351c83ab89a97f3515089e0d36bdccc Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 6 Aug 2011 10:33:12 -0700 Subject: drm/i915: Leave LVDS registers unlocked There's no reason to relock them; it just makes operations more complex. This fixes DPMS where the panel registers were locked making the disable not work. Signed-off-by: Keith Packard Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/intel_lvds.c | 51 ++++++++++++--------------------------- 1 file changed, 16 insertions(+), 35 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 63188285d7f9..8b521a289b29 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -400,53 +400,21 @@ out: static void intel_lvds_prepare(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_lvds *intel_lvds = to_intel_lvds(encoder); - /* We try to do the minimum that is necessary in order to unlock - * the registers for mode setting. - * - * On Ironlake, this is quite simple as we just set the unlock key - * and ignore all subtleties. (This may cause some issues...) - * + /* * Prior to Ironlake, we must disable the pipe if we want to adjust * the panel fitter. However at all other times we can just reset * the registers regardless. */ - - if (HAS_PCH_SPLIT(dev)) { - I915_WRITE(PCH_PP_CONTROL, - I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); - } else if (intel_lvds->pfit_dirty) { - I915_WRITE(PP_CONTROL, - (I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS) - & ~POWER_TARGET_ON); - } else { - I915_WRITE(PP_CONTROL, - I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); - } + if (!HAS_PCH_SPLIT(encoder->dev) && intel_lvds->pfit_dirty) + intel_lvds_disable(intel_lvds); } static void intel_lvds_commit(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; - struct drm_i915_private *dev_priv = dev->dev_private; struct intel_lvds *intel_lvds = to_intel_lvds(encoder); - /* Undo any unlocking done in prepare to prevent accidental - * adjustment of the registers. - */ - if (HAS_PCH_SPLIT(dev)) { - u32 val = I915_READ(PCH_PP_CONTROL); - if ((val & PANEL_UNLOCK_REGS) == PANEL_UNLOCK_REGS) - I915_WRITE(PCH_PP_CONTROL, val & 0x3); - } else { - u32 val = I915_READ(PP_CONTROL); - if ((val & PANEL_UNLOCK_REGS) == PANEL_UNLOCK_REGS) - I915_WRITE(PP_CONTROL, val & 0x3); - } - /* Always do a full power on as we do not know what state * we were left in. */ @@ -1042,6 +1010,19 @@ out: pwm = I915_READ(BLC_PWM_PCH_CTL1); pwm |= PWM_PCH_ENABLE; I915_WRITE(BLC_PWM_PCH_CTL1, pwm); + /* + * Unlock registers and just + * leave them unlocked + */ + I915_WRITE(PCH_PP_CONTROL, + I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); + } else { + /* + * Unlock registers and just + * leave them unlocked + */ + I915_WRITE(PP_CONTROL, + I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); } dev_priv->lid_notifier.notifier_call = intel_lid_notify; if (acpi_lid_notifier_register(&dev_priv->lid_notifier)) { -- cgit v1.2.3 From 1519b9956eb4b4180fa3f47c73341463cdcfaa37 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 6 Aug 2011 10:35:34 -0700 Subject: drm/i915: Fix PCH port pipe select in CPT disable paths CPT pipe select is different from previous generations (using two bits instead of one). All of the paths from intel_disable_pch_ports were not making this distinction. Mode setting with pipe A turned off would then also force all outputs on pipe B to get turned off as the disable code would mistakenly decide that all of these outputs were on pipe A and turn them off. This is an extension of the CPT DP disable fix (why didn't I fix this then?) Signed-off-by: Keith Packard Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_reg.h | 13 +++----- drivers/gpu/drm/i915/intel_display.c | 60 ++++++++++++++++++++++++++++++++---- 2 files changed, 58 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d1331f771e2f..5baaef4a0c5d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1318,6 +1318,7 @@ #define ADPA_PIPE_SELECT_MASK (1<<30) #define ADPA_PIPE_A_SELECT 0 #define ADPA_PIPE_B_SELECT (1<<30) +#define ADPA_PIPE_SELECT(pipe) ((pipe) << 30) #define ADPA_USE_VGA_HVPOLARITY (1<<15) #define ADPA_SETS_HVPOLARITY 0 #define ADPA_VSYNC_CNTL_DISABLE (1<<11) @@ -1460,6 +1461,7 @@ /* Selects pipe B for LVDS data. Must be set on pre-965. */ #define LVDS_PIPEB_SELECT (1 << 30) #define LVDS_PIPE_MASK (1 << 30) +#define LVDS_PIPE(pipe) ((pipe) << 30) /* LVDS dithering flag on 965/g4x platform */ #define LVDS_ENABLE_DITHER (1 << 25) /* LVDS sync polarity flags. Set to invert (i.e. negative) */ @@ -1499,9 +1501,6 @@ #define LVDS_B0B3_POWER_DOWN (0 << 2) #define LVDS_B0B3_POWER_UP (3 << 2) -#define LVDS_PIPE_ENABLED(V, P) \ - (((V) & (LVDS_PIPE_MASK | LVDS_PORT_EN)) == ((P) << 30 | LVDS_PORT_EN)) - /* Video Data Island Packet control */ #define VIDEO_DIP_DATA 0x61178 #define VIDEO_DIP_CTL 0x61170 @@ -3256,14 +3255,12 @@ #define ADPA_CRT_HOTPLUG_VOLREF_475MV (1<<17) #define ADPA_CRT_HOTPLUG_FORCE_TRIGGER (1<<16) -#define ADPA_PIPE_ENABLED(V, P) \ - (((V) & (ADPA_TRANS_SELECT_MASK | ADPA_DAC_ENABLE)) == ((P) << 30 | ADPA_DAC_ENABLE)) - /* or SDVOB */ #define HDMIB 0xe1140 #define PORT_ENABLE (1 << 31) #define TRANSCODER_A (0) #define TRANSCODER_B (1 << 30) +#define TRANSCODER(pipe) ((pipe) << 30) #define TRANSCODER_MASK (1 << 30) #define COLOR_FORMAT_8bpc (0) #define COLOR_FORMAT_12bpc (3 << 26) @@ -3280,9 +3277,6 @@ #define HSYNC_ACTIVE_HIGH (1 << 3) #define PORT_DETECTED (1 << 2) -#define HDMI_PIPE_ENABLED(V, P) \ - (((V) & (TRANSCODER_MASK | PORT_ENABLE)) == ((P) << 30 | PORT_ENABLE)) - /* PCH SDVOB multiplex with HDMIB */ #define PCH_SDVOB HDMIB @@ -3349,6 +3343,7 @@ #define PORT_TRANS_B_SEL_CPT (1<<29) #define PORT_TRANS_C_SEL_CPT (2<<29) #define PORT_TRANS_SEL_MASK (3<<29) +#define PORT_TRANS_SEL_CPT(pipe) ((pipe) << 29) #define TRANS_DP_CTL_A 0xe0300 #define TRANS_DP_CTL_B 0xe1300 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 35364e68a091..4c4c903e95ab 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -998,6 +998,53 @@ static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe, return true; } +static bool hdmi_pipe_enabled(struct drm_i915_private *dev_priv, + enum pipe pipe, u32 val) +{ + if ((val & PORT_ENABLE) == 0) + return false; + + if (HAS_PCH_CPT(dev_priv->dev)) { + if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe)) + return false; + } else { + if ((val & TRANSCODER_MASK) != TRANSCODER(pipe)) + return false; + } + return true; +} + +static bool lvds_pipe_enabled(struct drm_i915_private *dev_priv, + enum pipe pipe, u32 val) +{ + if ((val & LVDS_PORT_EN) == 0) + return false; + + if (HAS_PCH_CPT(dev_priv->dev)) { + if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe)) + return false; + } else { + if ((val & LVDS_PIPE_MASK) != LVDS_PIPE(pipe)) + return false; + } + return true; +} + +static bool adpa_pipe_enabled(struct drm_i915_private *dev_priv, + enum pipe pipe, u32 val) +{ + if ((val & ADPA_DAC_ENABLE) == 0) + return false; + if (HAS_PCH_CPT(dev_priv->dev)) { + if ((val & PORT_TRANS_SEL_MASK) != PORT_TRANS_SEL_CPT(pipe)) + return false; + } else { + if ((val & ADPA_PIPE_SELECT_MASK) != ADPA_PIPE_SELECT(pipe)) + return false; + } + return true; +} + static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 port_sel) { @@ -1011,7 +1058,7 @@ static void assert_pch_hdmi_disabled(struct drm_i915_private *dev_priv, enum pipe pipe, int reg) { u32 val = I915_READ(reg); - WARN(HDMI_PIPE_ENABLED(val, pipe), + WARN(hdmi_pipe_enabled(dev_priv, val, pipe), "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", reg, pipe_name(pipe)); } @@ -1028,13 +1075,13 @@ static void assert_pch_ports_disabled(struct drm_i915_private *dev_priv, reg = PCH_ADPA; val = I915_READ(reg); - WARN(ADPA_PIPE_ENABLED(val, pipe), + WARN(adpa_pipe_enabled(dev_priv, val, pipe), "PCH VGA enabled on transcoder %c, should be disabled\n", pipe_name(pipe)); reg = PCH_LVDS; val = I915_READ(reg); - WARN(LVDS_PIPE_ENABLED(val, pipe), + WARN(lvds_pipe_enabled(dev_priv, val, pipe), "PCH LVDS enabled on transcoder %c, should be disabled\n", pipe_name(pipe)); @@ -1370,7 +1417,7 @@ static void disable_pch_hdmi(struct drm_i915_private *dev_priv, enum pipe pipe, int reg) { u32 val = I915_READ(reg); - if (HDMI_PIPE_ENABLED(val, pipe)) { + if (hdmi_pipe_enabled(dev_priv, val, pipe)) { DRM_DEBUG_KMS("Disabling pch HDMI %x on pipe %d\n", reg, pipe); I915_WRITE(reg, val & ~PORT_ENABLE); @@ -1392,12 +1439,13 @@ static void intel_disable_pch_ports(struct drm_i915_private *dev_priv, reg = PCH_ADPA; val = I915_READ(reg); - if (ADPA_PIPE_ENABLED(val, pipe)) + if (adpa_pipe_enabled(dev_priv, val, pipe)) I915_WRITE(reg, val & ~ADPA_DAC_ENABLE); reg = PCH_LVDS; val = I915_READ(reg); - if (LVDS_PIPE_ENABLED(val, pipe)) { + if (lvds_pipe_enabled(dev_priv, val, pipe)) { + DRM_DEBUG_KMS("disable lvds on pipe %d val 0x%08x\n", pipe, val); I915_WRITE(reg, val & ~LVDS_PORT_EN); POSTING_READ(reg); udelay(100); -- cgit v1.2.3 From 4e6343898fe7eed6b3c0c3c809347bc88d5b4a1e Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 6 Aug 2011 10:39:45 -0700 Subject: drm/i915: Remove unused 'reg' argument to dp_pipe_enabled Just an extra parameter which isn't actually needed. Signed-off-by: Keith Packard Reviewed-by: Jesse Barnes --- drivers/gpu/drm/i915/intel_display.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4c4c903e95ab..f6f18c72068f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -980,8 +980,8 @@ static void assert_transcoder_disabled(struct drm_i915_private *dev_priv, pipe_name(pipe)); } -static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, enum pipe pipe, - int reg, u32 port_sel, u32 val) +static bool dp_pipe_enabled(struct drm_i915_private *dev_priv, + enum pipe pipe, u32 port_sel, u32 val) { if ((val & DP_PORT_EN) == 0) return false; @@ -1049,7 +1049,7 @@ static void assert_pch_dp_disabled(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 port_sel) { u32 val = I915_READ(reg); - WARN(dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val), + WARN(dp_pipe_enabled(dev_priv, pipe, port_sel, val), "PCH DP (0x%08x) enabled on transcoder %c, should be disabled\n", reg, pipe_name(pipe)); } @@ -1407,7 +1407,7 @@ static void disable_pch_dp(struct drm_i915_private *dev_priv, enum pipe pipe, int reg, u32 port_sel) { u32 val = I915_READ(reg); - if (dp_pipe_enabled(dev_priv, pipe, reg, port_sel, val)) { + if (dp_pipe_enabled(dev_priv, pipe, port_sel, val)) { DRM_DEBUG_KMS("Disabling pch dp %x on pipe %d\n", reg, pipe); I915_WRITE(reg, val & ~DP_PORT_EN); } -- cgit v1.2.3 From a88769cde24fcef11219cf99193ee558d1028217 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Jul 2011 10:11:28 -0700 Subject: firmware: fix google/gsmi.c build warning Modify function parameter type to match expected type. Fixes a build warning: drivers/firmware/google/gsmi.c:473: warning: initialization from incompatible pointer type Signed-off-by: Randy Dunlap Cc: Mike Waychison Signed-off-by: Greg Kroah-Hartman --- drivers/firmware/google/gsmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/google/gsmi.c b/drivers/firmware/google/gsmi.c index 68810fd1a59d..aa83de9db1b9 100644 --- a/drivers/firmware/google/gsmi.c +++ b/drivers/firmware/google/gsmi.c @@ -420,7 +420,7 @@ static efi_status_t gsmi_get_next_variable(unsigned long *name_size, static efi_status_t gsmi_set_variable(efi_char16_t *name, efi_guid_t *vendor, - unsigned long attr, + u32 attr, unsigned long data_size, void *data) { -- cgit v1.2.3 From 7de636fa25c19fc4187f85f8325c50b1b21a6d8b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 27 Jul 2011 12:11:25 -0700 Subject: driver core: fix kernel-doc warning in platform.c Warning(drivers/base/platform.c:50): No description found for parameter 'pdev' Warning(drivers/base/platform.c:50): Excess function parameter 'dev' description in 'arch_setup_pdev_archdata' Signed-off-by: Randy Dunlap Cc: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/base/platform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 0cad9c7f6bb5..99a5272d7c2f 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL_GPL(platform_bus); /** * arch_setup_pdev_archdata - Allow manipulation of archdata before its used - * @dev: platform device + * @pdev: platform device * * This is called before platform_device_add() such that any pdev_archdata may * be setup before the platform_notifier is called. So if a user needs to -- cgit v1.2.3 From f9e0b159dbff693bacb64a929e04f442df985b50 Mon Sep 17 00:00:00 2001 From: Arnaud Lacombe Date: Thu, 21 Jul 2011 13:16:19 -0400 Subject: drivers/base/devtmpfs.c: correct annotation of `setup_done' This fixes the following section mismatch issue: WARNING: vmlinux.o(.text+0x1192bf): Section mismatch in reference from the function devtmpfsd() to the variable .init.data:setup_done The function devtmpfsd() references the variable __initdata setup_done. This is often because devtmpfsd lacks a __initdata annotation or the annotation of setup_done is wrong. WARNING: vmlinux.o(.text+0x119342): Section mismatch in reference from the function devtmpfsd() to the variable .init.data:setup_done The function devtmpfsd() references the variable __initdata setup_done. This is often because devtmpfsd lacks a __initdata annotation or the annotation of setup_done is wrong. Signed-off-by: Arnaud Lacombe Signed-off-by: Greg Kroah-Hartman --- drivers/base/devtmpfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 33e1bed68fdd..a4760e095ff5 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -376,7 +376,7 @@ int devtmpfs_mount(const char *mntdir) return err; } -static __initdata DECLARE_COMPLETION(setup_done); +static DECLARE_COMPLETION(setup_done); static int handle(const char *name, mode_t mode, struct device *dev) { -- cgit v1.2.3 From b882fc1b03d46e67ffe06133f4f4532db6e8dd0d Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Thu, 28 Jul 2011 08:50:38 +0900 Subject: serial: samsung: Fix build error drivers/tty/serial/samsung.c: In function 's3c24xx_serial_init': drivers/tty/serial/samsung.c:1237: error: lvalue required as unary '&' operand Cc: Greg Kroah-Hartman Signed-off-by: Kukjin Kim Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/samsung.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/samsung.c b/drivers/tty/serial/samsung.c index afc629423152..6edafb5ace18 100644 --- a/drivers/tty/serial/samsung.c +++ b/drivers/tty/serial/samsung.c @@ -1225,15 +1225,19 @@ static const struct dev_pm_ops s3c24xx_serial_pm_ops = { .suspend = s3c24xx_serial_suspend, .resume = s3c24xx_serial_resume, }; +#define SERIAL_SAMSUNG_PM_OPS (&s3c24xx_serial_pm_ops) + #else /* !CONFIG_PM_SLEEP */ -#define s3c24xx_serial_pm_ops NULL + +#define SERIAL_SAMSUNG_PM_OPS NULL #endif /* CONFIG_PM_SLEEP */ int s3c24xx_serial_init(struct platform_driver *drv, struct s3c24xx_uart_info *info) { dbg("s3c24xx_serial_init(%p,%p)\n", drv, info); - drv->driver.pm = &s3c24xx_serial_pm_ops; + + drv->driver.pm = SERIAL_SAMSUNG_PM_OPS; return platform_driver_register(drv); } -- cgit v1.2.3 From a96edd59b2bc88b3d1ea47e0ba48076d65db9302 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 4 Aug 2011 16:44:42 -0600 Subject: ASoC: Tegra: tegra_pcm_deallocate_dma_buffer: Don't OOPS Not all PCM devices have all sub-streams. Specifically, the SPDIF driver only supports playback and hence has no capture substream. Check whether a substream exists before dereferencing it, when de-allocating DMA buffers in tegra_pcm_deallocate_dma_buffer. Signed-off-by: Stephen Warren Acked-by: Liam Girdwood Signed-off-by: Mark Brown Cc: stable@kernel.org --- sound/soc/tegra/tegra_pcm.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra_pcm.c b/sound/soc/tegra/tegra_pcm.c index ff86e5e3db68..c7cfd96e991e 100644 --- a/sound/soc/tegra/tegra_pcm.c +++ b/sound/soc/tegra/tegra_pcm.c @@ -309,9 +309,14 @@ static int tegra_pcm_preallocate_dma_buffer(struct snd_pcm *pcm, int stream) static void tegra_pcm_deallocate_dma_buffer(struct snd_pcm *pcm, int stream) { - struct snd_pcm_substream *substream = pcm->streams[stream].substream; - struct snd_dma_buffer *buf = &substream->dma_buffer; + struct snd_pcm_substream *substream; + struct snd_dma_buffer *buf; + + substream = pcm->streams[stream].substream; + if (!substream) + return; + buf = &substream->dma_buffer; if (!buf->area) return; -- cgit v1.2.3 From 29591ed4ac6fe00e3ff23b5be0cdc7016ef9c47e Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 4 Aug 2011 16:44:43 -0600 Subject: ASoC: Tegra: wm8903 machine driver: Allow re-insertion of module Two issues were preventing module snd-soc-tegra-wm8903.ko from being removed and re-inserted: a) The speaker-enable GPIO is hosted by the WM8903 chip. This GPIO must be freed before snd_soc_unregister_card() is called, because that triggers wm8903.c:wm8903_remove(), which calls gpiochip_remove(), which then fails if any of the GPIOs are in use. To solve this, free all GPIOs first, so the code doesn't care where they come from. b) We need to call snd_soc_jack_free_gpios() to match the call to snd_soc_jack_add_gpios() during initialization. Without this, the call to snd_soc_jack_add_gpios() fails during any subsequent modprobe and initialization, since the GPIO and IRQ are already registered. In turn, this causes the headphone state not to be monitored, so the headphone is assumed not to be plugged in, and the audio path to it is never enabled. Signed-off-by: Stephen Warren Cc: stable@kernel.org Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_wm8903.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/sound/soc/tegra/tegra_wm8903.c b/sound/soc/tegra/tegra_wm8903.c index a42e9ac30f28..661373c2352a 100644 --- a/sound/soc/tegra/tegra_wm8903.c +++ b/sound/soc/tegra/tegra_wm8903.c @@ -56,6 +56,7 @@ #define GPIO_HP_MUTE BIT(1) #define GPIO_INT_MIC_EN BIT(2) #define GPIO_EXT_MIC_EN BIT(3) +#define GPIO_HP_DET BIT(4) struct tegra_wm8903 { struct tegra_asoc_utils_data util_data; @@ -304,6 +305,7 @@ static int tegra_wm8903_init(struct snd_soc_pcm_runtime *rtd) snd_soc_jack_add_gpios(&tegra_wm8903_hp_jack, 1, &tegra_wm8903_hp_jack_gpio); + machine->gpio_requested |= GPIO_HP_DET; } snd_soc_jack_new(codec, "Mic Jack", SND_JACK_MICROPHONE, @@ -429,10 +431,10 @@ static int __devexit tegra_wm8903_driver_remove(struct platform_device *pdev) struct tegra_wm8903 *machine = snd_soc_card_get_drvdata(card); struct tegra_wm8903_platform_data *pdata = machine->pdata; - snd_soc_unregister_card(card); - - tegra_asoc_utils_fini(&machine->util_data); - + if (machine->gpio_requested & GPIO_HP_DET) + snd_soc_jack_free_gpios(&tegra_wm8903_hp_jack, + 1, + &tegra_wm8903_hp_jack_gpio); if (machine->gpio_requested & GPIO_EXT_MIC_EN) gpio_free(pdata->gpio_ext_mic_en); if (machine->gpio_requested & GPIO_INT_MIC_EN) @@ -441,6 +443,11 @@ static int __devexit tegra_wm8903_driver_remove(struct platform_device *pdev) gpio_free(pdata->gpio_hp_mute); if (machine->gpio_requested & GPIO_SPKR_EN) gpio_free(pdata->gpio_spkr_en); + machine->gpio_requested = 0; + + snd_soc_unregister_card(card); + + tegra_asoc_utils_fini(&machine->util_data); kfree(machine); -- cgit v1.2.3 From f99847a6909b95f857ee502ec98c372dcfd90b12 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 4 Aug 2011 16:44:44 -0600 Subject: ASoC: WM8903: Free IRQ on device removal Without this, request_irq on subsequent device initialization fails, and the codec cannot be used. Signed-off-by: Stephen Warren Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/codecs/wm8903.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/codecs/wm8903.c b/sound/soc/codecs/wm8903.c index 43e3d760766f..4ad8ebd290e3 100644 --- a/sound/soc/codecs/wm8903.c +++ b/sound/soc/codecs/wm8903.c @@ -2046,8 +2046,13 @@ static int wm8903_probe(struct snd_soc_codec *codec) /* power down chip */ static int wm8903_remove(struct snd_soc_codec *codec) { + struct wm8903_priv *wm8903 = snd_soc_codec_get_drvdata(codec); + wm8903_free_gpio(codec); wm8903_set_bias_level(codec, SND_SOC_BIAS_OFF); + if (wm8903->irq) + free_irq(wm8903->irq, codec); + return 0; } -- cgit v1.2.3 From cd566c64f50e568c0ac3c13bdd15f523631ce845 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 8 Aug 2011 23:39:59 -0700 Subject: Input: mma8450 - fix module device table type The module device table for of_device_id should use "of" type. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/misc/mma8450.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c index 6c76cf792991..0794778295fc 100644 --- a/drivers/input/misc/mma8450.c +++ b/drivers/input/misc/mma8450.c @@ -234,7 +234,7 @@ static const struct of_device_id mma8450_dt_ids[] = { { .compatible = "fsl,mma8450", }, { /* sentinel */ } }; -MODULE_DEVICE_TABLE(i2c, mma8450_dt_ids); +MODULE_DEVICE_TABLE(of, mma8450_dt_ids); static struct i2c_driver mma8450_driver = { .driver = { -- cgit v1.2.3 From db0b34b07438d92c4c190998c42a502fbf90064e Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Mon, 8 Aug 2011 23:45:14 -0700 Subject: Input: bcm5974 - add support for touchpads found in MacBookAir4,2 Added USB device IDs for MacBookAir4,2 trackpad. Device constants were copied from the MacBookAir3,2 constants. The 4,2 device specification is reportedly unchanged from the 3,2 predecessor and seems to work well. Signed-off-by: Joshua V Dillon Signed-off-by: Chase Douglas Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/bcm5974.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index 3126983c004a..48d9ec13d32d 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -67,6 +67,10 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI 0x0245 #define USB_DEVICE_ID_APPLE_WELLSPRING5_ISO 0x0246 #define USB_DEVICE_ID_APPLE_WELLSPRING5_JIS 0x0247 +/* MacbookAir4,2 (unibody, July 2011) */ +#define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI 0x024c +#define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO 0x024d +#define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS 0x024e #define BCM5974_DEVICE(prod) { \ .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \ @@ -104,6 +108,10 @@ static const struct usb_device_id bcm5974_table[] = { BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5_JIS), + /* MacbookAir4,2 */ + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS), /* Terminating entry */ {} }; @@ -294,6 +302,18 @@ static const struct bcm5974_config bcm5974_config_table[] = { { DIM_X, DIM_X / SN_COORD, -4415, 5050 }, { DIM_Y, DIM_Y / SN_COORD, -55, 6680 } }, + { + USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI, + USB_DEVICE_ID_APPLE_WELLSPRING6_ISO, + USB_DEVICE_ID_APPLE_WELLSPRING6_JIS, + HAS_INTEGRATED_BUTTON, + 0x84, sizeof(struct bt_data), + 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, + { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, + { DIM_X, DIM_X / SN_COORD, -4620, 5140 }, + { DIM_Y, DIM_Y / SN_COORD, -150, 6600 } + }, {} }; -- cgit v1.2.3 From 80e0401e35410a69bfae05b454db8a7187edd6b8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Aug 2011 14:26:17 +0200 Subject: lockdep: Fix wrong assumption in match_held_lock match_held_lock() was assuming it was being called on a lock class that had already seen usage. This condition was true for bug-free code using lockdep_assert_held(), since you're in fact holding the lock when calling it. However the assumption fails the moment you assume the assertion can fail, which is the whole point of having the assertion in the first place. Anyway, now that there's more lockdep_is_held() users, notably __rcu_dereference_check(), its much easier to trigger this since we test for a number of locks and we only need to hold any one of them to be good. Reported-by: Sergey Senozhatsky Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1312547787.28695.2.camel@twins Signed-off-by: Ingo Molnar --- kernel/lockdep.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 8c24294e477f..91d67ce3a8d5 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -3111,7 +3111,13 @@ static int match_held_lock(struct held_lock *hlock, struct lockdep_map *lock) if (!class) class = look_up_lock_class(lock, 0); - if (DEBUG_LOCKS_WARN_ON(!class)) + /* + * If look_up_lock_class() failed to find a class, we're trying + * to test if we hold a lock that has never yet been acquired. + * Clearly if the lock hasn't been acquired _ever_, we're not + * holding it either, so report failure. + */ + if (!class) return 0; if (DEBUG_LOCKS_WARN_ON(!hlock->nest_lock)) -- cgit v1.2.3 From ea5e116162b7e0cf83a2b8a273440514404604de Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 3 Aug 2011 11:12:17 -0400 Subject: xen/blkback: Make description more obvious. With the frontend having Xen but the backend not, it just looks odd: <*> Xen virtual block device support <*> Block-device backend driver Fix it to have the 'Xen' in front of it. Reported-by: Sander Eikelenboom Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 717d6e4e18d3..a89ebf1b28aa 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -471,7 +471,7 @@ config XEN_BLKDEV_FRONTEND in another domain which drives the actual block device. config XEN_BLKDEV_BACKEND - tristate "Block-device backend driver" + tristate "Xen block-device backend driver" depends on XEN_BACKEND help The block-device backend driver allows the kernel to export its -- cgit v1.2.3 From 6678050442e90a4e9511a9ed14b9bdfc5e393323 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Aug 2011 17:36:48 +0900 Subject: ASoC: Fix binding of WM8750 on Jive The I2C address is misformatted and would never match. Signed-off-by: Mark Brown Acked-by: Liam Girdwood Cc: stable@kernel.org --- sound/soc/samsung/jive_wm8750.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/samsung/jive_wm8750.c b/sound/soc/samsung/jive_wm8750.c index 3b53ad54bc33..14eb6ea69e7c 100644 --- a/sound/soc/samsung/jive_wm8750.c +++ b/sound/soc/samsung/jive_wm8750.c @@ -131,7 +131,7 @@ static struct snd_soc_dai_link jive_dai = { .cpu_dai_name = "s3c2412-i2s", .codec_dai_name = "wm8750-hifi", .platform_name = "samsung-audio", - .codec_name = "wm8750-codec.0-0x1a", + .codec_name = "wm8750-codec.0-001a", .init = jive_wm8750_init, .ops = &jive_ops, }; -- cgit v1.2.3 From 40045a85df0ec4406fe611967ea9cf9fa668f493 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Aug 2011 18:32:09 +0900 Subject: ASoC: Fix SPI driver binding for WM8987 As we had no id_table only the driver name would be matched against meaning that WM8987 devices wouldn't be bound. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8750.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c index 38f38fddd190..65fe78aa3757 100644 --- a/sound/soc/codecs/wm8750.c +++ b/sound/soc/codecs/wm8750.c @@ -778,11 +778,18 @@ static int __devexit wm8750_spi_remove(struct spi_device *spi) return 0; } +static const struct spi_device_id wm8750_spi_ids[] = { + { "wm8750", 0 }, + { "wm8987", 0 }, +}; +MODULE_DEVICE_TABLE(spi, wm8750_spi_id); + static struct spi_driver wm8750_spi_driver = { .driver = { .name = "wm8750-codec", .owner = THIS_MODULE, }, + .id_table = wm8750_spi_ids, .probe = wm8750_spi_probe, .remove = __devexit_p(wm8750_spi_remove), }; -- cgit v1.2.3 From 371e7305c6c348d9e14a98fe337fadbd4106cfef Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 4 Aug 2011 10:54:17 +0900 Subject: ASoC: Fix warning in Speyside WM8962 Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/samsung/speyside_wm8962.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/samsung/speyside_wm8962.c b/sound/soc/samsung/speyside_wm8962.c index 8ac42bf82090..0b9eb5f7ec4c 100644 --- a/sound/soc/samsung/speyside_wm8962.c +++ b/sound/soc/samsung/speyside_wm8962.c @@ -37,7 +37,7 @@ static int speyside_wm8962_set_bias_level(struct snd_soc_card *card, 44100 * 256, SND_SOC_CLOCK_IN); if (ret < 0) { - pr_err("Failed to set SYSCLK: %d\n"); + pr_err("Failed to set SYSCLK: %d\n", ret); return ret; } } -- cgit v1.2.3 From da64c6fc4aba6f02aa800db72411f459a9f86809 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 9 Aug 2011 09:17:46 -0700 Subject: drm/i915: show interrupt info on IVB IVB uses the same interrupt reg layout as SNB, so add an IS_GEN7 to the interrupt debugfs file. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a8ab6263e0d7..3c395a59da35 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -499,7 +499,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "Interrupts received: %d\n", atomic_read(&dev_priv->irq_received)); for (i = 0; i < I915_NUM_RINGS; i++) { - if (IS_GEN6(dev)) { + if (IS_GEN6(dev) || IS_GEN7(dev)) { seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", dev_priv->ring[i].name, I915_READ_IMR(&dev_priv->ring[i])); -- cgit v1.2.3 From 13d83a672e9bbd52ae82c2f611dfd845a957e8b4 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 3 Aug 2011 12:59:20 -0700 Subject: drm/i915: split out PCH refclk update code We ought to be calling this from our DPMS routines as well as global state may change and we need to enable/disable clocks. So split out the code in preparation for further changes. Signed-off-by: Jesse Barnes Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 119 ++++++++++++++++++++++------------- 1 file changed, 76 insertions(+), 43 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index f6f18c72068f..ee1d701317f7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5097,6 +5097,81 @@ static int i9xx_crtc_mode_set(struct drm_crtc *crtc, return ret; } +static void ironlake_update_pch_refclk(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_mode_config *mode_config = &dev->mode_config; + struct drm_crtc *crtc; + struct intel_encoder *encoder; + struct intel_encoder *has_edp_encoder = NULL; + u32 temp; + bool has_lvds = false; + + /* We need to take the global config into account */ + list_for_each_entry(crtc, &mode_config->crtc_list, head) { + if (!crtc->enabled) + continue; + + list_for_each_entry(encoder, &mode_config->encoder_list, + base.head) { + if (encoder->base.crtc != crtc) + continue; + + switch (encoder->type) { + case INTEL_OUTPUT_LVDS: + has_lvds = true; + case INTEL_OUTPUT_EDP: + has_edp_encoder = encoder; + break; + } + } + } + + /* Ironlake: try to setup display ref clock before DPLL + * enabling. This is only under driver's control after + * PCH B stepping, previous chipset stepping should be + * ignoring this setting. + */ + temp = I915_READ(PCH_DREF_CONTROL); + /* Always enable nonspread source */ + temp &= ~DREF_NONSPREAD_SOURCE_MASK; + temp |= DREF_NONSPREAD_SOURCE_ENABLE; + temp &= ~DREF_SSC_SOURCE_MASK; + temp |= DREF_SSC_SOURCE_ENABLE; + I915_WRITE(PCH_DREF_CONTROL, temp); + + POSTING_READ(PCH_DREF_CONTROL); + udelay(200); + + if (has_edp_encoder) { + if (intel_panel_use_ssc(dev_priv)) { + temp |= DREF_SSC1_ENABLE; + I915_WRITE(PCH_DREF_CONTROL, temp); + + POSTING_READ(PCH_DREF_CONTROL); + udelay(200); + } + temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK; + + /* Enable CPU source on CPU attached eDP */ + if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) { + if (intel_panel_use_ssc(dev_priv)) + temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD; + else + temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD; + } else { + /* Enable SSC on PCH eDP if needed */ + if (intel_panel_use_ssc(dev_priv)) { + DRM_ERROR("enabling SSC on PCH\n"); + temp |= DREF_SUPERSPREAD_SOURCE_ENABLE; + } + } + I915_WRITE(PCH_DREF_CONTROL, temp); + POSTING_READ(PCH_DREF_CONTROL); + udelay(200); + } +} + static int ironlake_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode, @@ -5292,49 +5367,7 @@ static int ironlake_crtc_mode_set(struct drm_crtc *crtc, ironlake_compute_m_n(intel_crtc->bpp, lane, target_clock, link_bw, &m_n); - /* Ironlake: try to setup display ref clock before DPLL - * enabling. This is only under driver's control after - * PCH B stepping, previous chipset stepping should be - * ignoring this setting. - */ - temp = I915_READ(PCH_DREF_CONTROL); - /* Always enable nonspread source */ - temp &= ~DREF_NONSPREAD_SOURCE_MASK; - temp |= DREF_NONSPREAD_SOURCE_ENABLE; - temp &= ~DREF_SSC_SOURCE_MASK; - temp |= DREF_SSC_SOURCE_ENABLE; - I915_WRITE(PCH_DREF_CONTROL, temp); - - POSTING_READ(PCH_DREF_CONTROL); - udelay(200); - - if (has_edp_encoder) { - if (intel_panel_use_ssc(dev_priv)) { - temp |= DREF_SSC1_ENABLE; - I915_WRITE(PCH_DREF_CONTROL, temp); - - POSTING_READ(PCH_DREF_CONTROL); - udelay(200); - } - temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK; - - /* Enable CPU source on CPU attached eDP */ - if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) { - if (intel_panel_use_ssc(dev_priv)) - temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD; - else - temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD; - } else { - /* Enable SSC on PCH eDP if needed */ - if (intel_panel_use_ssc(dev_priv)) { - DRM_ERROR("enabling SSC on PCH\n"); - temp |= DREF_SUPERSPREAD_SOURCE_ENABLE; - } - } - I915_WRITE(PCH_DREF_CONTROL, temp); - POSTING_READ(PCH_DREF_CONTROL); - udelay(200); - } + ironlake_update_pch_refclk(dev); fp = clock.n << 16 | clock.m1 << 8 | clock.m2; if (has_reduced_clock) -- cgit v1.2.3 From fa1bf42ff9296ac4cf211b0a1b450a6071d26a95 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Tue, 9 Aug 2011 20:32:09 +0200 Subject: allow blk_flush_policy to return REQ_FSEQ_DATA independent of *FLUSH blk_insert_flush has the following check: /* * If there's data but flush is not necessary, the request can be * processed directly without going through flush machinery. Queue * for normal execution. */ if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { list_add_tail(&rq->queuelist, &q->queue_head); return; } However, blk_flush_policy will not return with policy set to only REQ_FSEQ_DATA: static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) { unsigned int policy = 0; if (fflags & REQ_FLUSH) { if (rq->cmd_flags & REQ_FLUSH) policy |= REQ_FSEQ_PREFLUSH; if (blk_rq_sectors(rq)) policy |= REQ_FSEQ_DATA; if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) policy |= REQ_FSEQ_POSTFLUSH; } return policy; } Notice that REQ_FSEQ_DATA is only set if REQ_FLUSH is set. Fix this mismatch by moving the setting of REQ_FSEQ_DATA outside of the REQ_FLUSH check. Tejun notes: Hmmm... yes, this can become a correctness issue if (and only if) blk_queue_flush() is called to change q->flush_flags while requests are in-flight; otherwise, requests wouldn't reach the function at all. Also, I think it would be a generally good idea to always set FSEQ_DATA if the request has data. Cheers, Jeff Signed-off-by: Jeff Moyer Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-flush.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index bb21e4c36f70..2d162bd840d3 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) { unsigned int policy = 0; + if (blk_rq_sectors(rq)) + policy |= REQ_FSEQ_DATA; + if (fflags & REQ_FLUSH) { if (rq->cmd_flags & REQ_FLUSH) policy |= REQ_FSEQ_PREFLUSH; - if (blk_rq_sectors(rq)) - policy |= REQ_FSEQ_DATA; if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) policy |= REQ_FSEQ_POSTFLUSH; } -- cgit v1.2.3 From 88ff98775885d72618cbfc5ed6b865593cb66891 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 9 Aug 2011 12:36:00 -0700 Subject: [IA64] fix "allnoconfig" build Link errors: arch/ia64/kernel/built-in.o: In function `arch_setup_dmar_msi': (.text+0x35972): undefined reference to `dmar_msi_write' ... and more ... because allnoconfig has CONFIG_DMAR=y due to the "select DMAR" in arch/ia64/Kconfig under config IA64_GENERIC. Drop that select, but add CONFIG_DMAR=y to generic_defconfig so we keep testbuilding the DMAR code. Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 1 - arch/ia64/configs/generic_defconfig | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 124854714958..3ff7785b3beb 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -162,7 +162,6 @@ config IA64_GENERIC select ACPI_NUMA select SWIOTLB select PCI_MSI - select DMAR help This selects the system type of your hardware. A "generic" kernel will run on any supported IA-64 system. However, if you configure diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index 1d7bca0a396d..0e5cd1405e0e 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -234,3 +234,4 @@ CONFIG_CRYPTO_MD5=y # CONFIG_CRYPTO_ANSI_CPRNG is not set CONFIG_CRC_T10DIF=y CONFIG_MISC_DEVICES=y +CONFIG_DMAR=y -- cgit v1.2.3 From 5ac04bf190e6f8b17238aef179ebd7f2bdfec919 Mon Sep 17 00:00:00 2001 From: Andiry Xu Date: Wed, 3 Aug 2011 16:46:48 +0800 Subject: xHCI: fix port U3 status check condition Fix the port U3 status check when Clear PORT_SUSPEND Feature. The port status should be masked with PORT_PLS_MASK to check if it's in U3 state. This should be backported to kernels as old as 2.6.37. Signed-off-by: Andiry Xu Signed-off-by: Sarah Sharp Cc: stable@kernel.org --- drivers/usb/host/xhci-hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 0be788cc2fdb..cddcdccadbf7 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -664,7 +664,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, xhci_dbg(xhci, "PORTSC %04x\n", temp); if (temp & PORT_RESET) goto error; - if (temp & XDEV_U3) { + if ((temp & PORT_PLS_MASK) == XDEV_U3) { if ((temp & PORT_PE) == 0) goto error; -- cgit v1.2.3 From 8a8ff2f9399b23b968901f585ccb5a70a537c5ae Mon Sep 17 00:00:00 2001 From: Andiry Xu Date: Wed, 3 Aug 2011 16:46:49 +0800 Subject: xHCI: report USB2 port in resuming as suspend When a USB2 port initiate a remote wakeup, software shall ensure that resume is signaled for at least 20ms, and then write '0' to the PLS field. According to this, xhci driver do the following things: 1. When receive a remote wakeup event in irq_handler, set the resume_done value as jiffies + 20ms, and modify rh_timer to poll root hub status at that time; 2. When receive a GetPortStatus request, if the jiffies is after the resume_done value, clear the resume signal and resume_done. However, if usb_port_resume() is called before the rh_timer triggered, it will indicate the port as Suspend Cleared and skip the clear resume signal part. The device will fail the usb_get_status request in finish_port_resume(), and usbcore will try a reset-resume instead. Device will work OK after reset-resume, but resume_done value is not cleared in this case, and xhci_bus_suspend() will fail because when it finds a non-zero resume_done value, it will regard the port as resuming and return -EBUSY. This causes issue on some platforms that the system fail to suspend after remote wakeup from suspend by USB2 devices connected to xHCI port. To fix this issue, report the port status as suspend if the resume is signaling less that 20ms, and usb_port_resume() will wait 25ms and check port status again, so xHCI driver can clear the resume signaling and resume_done value. This should be backported to kernels as old as 2.6.37. Signed-off-by: Andiry Xu Signed-off-by: Sarah Sharp Cc: stable@kernel.org --- drivers/usb/host/xhci-hub.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index cddcdccadbf7..1e96d1f1fe6b 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -463,11 +463,12 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, && (temp & PORT_POWER)) status |= USB_PORT_STAT_SUSPEND; } - if ((temp & PORT_PLS_MASK) == XDEV_RESUME) { + if ((temp & PORT_PLS_MASK) == XDEV_RESUME && + !DEV_SUPERSPEED(temp)) { if ((temp & PORT_RESET) || !(temp & PORT_PE)) goto error; - if (!DEV_SUPERSPEED(temp) && time_after_eq(jiffies, - bus_state->resume_done[wIndex])) { + if (time_after_eq(jiffies, + bus_state->resume_done[wIndex])) { xhci_dbg(xhci, "Resume USB2 port %d\n", wIndex + 1); bus_state->resume_done[wIndex] = 0; @@ -487,6 +488,14 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, xhci_ring_device(xhci, slot_id); bus_state->port_c_suspend |= 1 << wIndex; bus_state->suspended_ports &= ~(1 << wIndex); + } else { + /* + * The resume has been signaling for less than + * 20ms. Report the port status as SUSPEND, + * let the usbcore check port status again + * and clear resume signaling later. + */ + status |= USB_PORT_STAT_SUSPEND; } } if ((temp & PORT_PLS_MASK) == XDEV_U0 -- cgit v1.2.3 From d13565c12828ce0cd2a3862bf6260164a0653352 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Fri, 22 Jul 2011 14:34:34 -0700 Subject: xhci: Fix memory leak during failed enqueue. When the isochronous transfer support was introduced, and the xHCI driver switched to using urb->hcpriv to store an "urb_priv" pointer, a couple of memory leaks were introduced into the URB enqueue function in its error handling paths. xhci_urb_enqueue allocates urb_priv, but it doesn't free it if changing the control endpoint's max packet size fails or the bulk endpoint is in the middle of allocating or deallocating streams. xhci_urb_enqueue also doesn't free urb_priv if any of the four endpoint types' enqueue functions fail. Instead, it expects those functions to free urb_priv if an error occurs. However, the bulk, control, and interrupt enqueue functions do not free urb_priv if the endpoint ring is NULL. It will, however, get freed if prepare_transfer() fails in those enqueue functions. Several of the error paths in the isochronous endpoint enqueue function also fail to free it. xhci_queue_isoc_tx_prepare() doesn't free urb_priv if prepare_ring() indicates there is not enough room for all the isochronous TDs in this URB. If individual isochronous TDs fail to be queued (perhaps due to an endpoint state change), urb_priv is also leaked. This argues that the freeing of urb_priv should be done in the function that allocated it, xhci_urb_enqueue. This patch looks rather ugly, but refactoring the code will have to wait because this patch needs to be backported to stable kernels. This patch should be backported to kernels as old as 2.6.36. Signed-off-by: Sarah Sharp Cc: Andiry Xu Cc: stable@kernel.org --- drivers/usb/host/xhci-ring.c | 5 +---- drivers/usb/host/xhci.c | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 7113d16e2d3a..9d3f9dd1ad28 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2500,11 +2500,8 @@ static int prepare_transfer(struct xhci_hcd *xhci, if (td_index == 0) { ret = usb_hcd_link_urb_to_ep(bus_to_hcd(urb->dev->bus), urb); - if (unlikely(ret)) { - xhci_urb_free_priv(xhci, urb_priv); - urb->hcpriv = NULL; + if (unlikely(ret)) return ret; - } } td->urb = urb; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 1c4432d8fc10..8e84acff1134 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1085,8 +1085,11 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) if (urb->dev->speed == USB_SPEED_FULL) { ret = xhci_check_maxpacket(xhci, slot_id, ep_index, urb); - if (ret < 0) + if (ret < 0) { + xhci_urb_free_priv(xhci, urb_priv); + urb->hcpriv = NULL; return ret; + } } /* We have a spinlock and interrupts disabled, so we must pass @@ -1097,6 +1100,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) goto dying; ret = xhci_queue_ctrl_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); + if (ret) + goto free_priv; spin_unlock_irqrestore(&xhci->lock, flags); } else if (usb_endpoint_xfer_bulk(&urb->ep->desc)) { spin_lock_irqsave(&xhci->lock, flags); @@ -1117,6 +1122,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) ret = xhci_queue_bulk_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); } + if (ret) + goto free_priv; spin_unlock_irqrestore(&xhci->lock, flags); } else if (usb_endpoint_xfer_int(&urb->ep->desc)) { spin_lock_irqsave(&xhci->lock, flags); @@ -1124,6 +1131,8 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) goto dying; ret = xhci_queue_intr_tx(xhci, GFP_ATOMIC, urb, slot_id, ep_index); + if (ret) + goto free_priv; spin_unlock_irqrestore(&xhci->lock, flags); } else { spin_lock_irqsave(&xhci->lock, flags); @@ -1131,18 +1140,22 @@ int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flags) goto dying; ret = xhci_queue_isoc_tx_prepare(xhci, GFP_ATOMIC, urb, slot_id, ep_index); + if (ret) + goto free_priv; spin_unlock_irqrestore(&xhci->lock, flags); } exit: return ret; dying: - xhci_urb_free_priv(xhci, urb_priv); - urb->hcpriv = NULL; xhci_dbg(xhci, "Ep 0x%x: URB %p submitted for " "non-responsive xHCI host.\n", urb->ep->desc.bEndpointAddress, urb); + ret = -ESHUTDOWN; +free_priv: + xhci_urb_free_priv(xhci, urb_priv); + urb->hcpriv = NULL; spin_unlock_irqrestore(&xhci->lock, flags); - return -ESHUTDOWN; + return ret; } /* Get the right ring for the given URB. -- cgit v1.2.3 From 522989a27c7badb608155b1f1dea3487ed431f74 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Fri, 29 Jul 2011 12:44:32 -0700 Subject: xhci: Fix failed enqueue in the middle of isoch TD. When an isochronous transfer is enqueued, xhci_queue_isoc_tx_prepare() will ensure that there is enough room on the transfer rings for all of the isochronous TDs for that URB. However, when xhci_queue_isoc_tx() is enqueueing individual isoc TDs, the prepare_transfer() function can fail if the endpoint state has changed to disabled, error, or some other unknown state. With the current code, if Nth TD (not the first TD) fails, the ring is left in a sorry state. The partially enqueued TDs are left on the ring, and the first TRB of the TD is not given back to the hardware. The enqueue pointer is left on the TRB after the last successfully enqueued TD. This means the ring is basically useless. Any new transfers will be enqueued after the failed TDs, which the hardware will never read because the cycle bit indicates it does not own them. The ring will fill up with untransferred TDs, and the endpoint will be basically unusable. The untransferred TDs will also remain on the TD list. Since the td_list is a FIFO, this basically means the ring handler will be waiting on TDs that will never be completed (or worse, dereference memory that doesn't exist any more). Change the code to clean up the isochronous ring after a failed transfer. If the first TD failed, simply return and allow the xhci_urb_enqueue function to free the urb_priv. If the Nth TD failed, first remove the TDs from the td_list. Then convert the TRBs that were enqueued into No-op TRBs. Make sure to flip the cycle bit on all enqueued TRBs (including any link TRBs in the middle or between TDs), but leave the cycle bit of the first TRB (which will show software-owned) intact. Then move the ring enqueue pointer back to the first TRB and make sure to change the xhci_ring's cycle state to what is appropriate for that ring segment. This ensures that the No-op TRBs will be overwritten by subsequent TDs, and the hardware will not start executing random TRBs because the cycle bit was left as hardware-owned. This bug is unlikely to be hit, but it was something I noticed while tracking down the watchdog timer issue. I verified that the fix works by injecting some errors on the 250th isochronous URB queued, although I could not verify that the ring is in the correct state because uvcvideo refused to talk to the device after the first usb_submit_urb() failed. Ring debugging shows that the ring looks correct, however. This patch should be backported to kernels as old as 2.6.36. Signed-off-by: Sarah Sharp Cc: Andiry Xu Cc: stable@kernel.org --- drivers/usb/host/xhci-ring.c | 50 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 9d3f9dd1ad28..f72149b666b1 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -514,8 +514,12 @@ void xhci_find_new_dequeue_state(struct xhci_hcd *xhci, (unsigned long long) addr); } +/* flip_cycle means flip the cycle bit of all but the first and last TRB. + * (The last TRB actually points to the ring enqueue pointer, which is not part + * of this TD.) This is used to remove partially enqueued isoc TDs from a ring. + */ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, - struct xhci_td *cur_td) + struct xhci_td *cur_td, bool flip_cycle) { struct xhci_segment *cur_seg; union xhci_trb *cur_trb; @@ -528,6 +532,12 @@ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, * leave the pointers intact. */ cur_trb->generic.field[3] &= cpu_to_le32(~TRB_CHAIN); + /* Flip the cycle bit (link TRBs can't be the first + * or last TRB). + */ + if (flip_cycle) + cur_trb->generic.field[3] ^= + cpu_to_le32(TRB_CYCLE); xhci_dbg(xhci, "Cancel (unchain) link TRB\n"); xhci_dbg(xhci, "Address = %p (0x%llx dma); " "in seg %p (0x%llx dma)\n", @@ -541,6 +551,11 @@ static void td_to_noop(struct xhci_hcd *xhci, struct xhci_ring *ep_ring, cur_trb->generic.field[2] = 0; /* Preserve only the cycle bit of this TRB */ cur_trb->generic.field[3] &= cpu_to_le32(TRB_CYCLE); + /* Flip the cycle bit except on the first or last TRB */ + if (flip_cycle && cur_trb != cur_td->first_trb && + cur_trb != cur_td->last_trb) + cur_trb->generic.field[3] ^= + cpu_to_le32(TRB_CYCLE); cur_trb->generic.field[3] |= cpu_to_le32( TRB_TYPE(TRB_TR_NOOP)); xhci_dbg(xhci, "Cancel TRB %p (0x%llx dma) " @@ -719,7 +734,7 @@ static void handle_stopped_endpoint(struct xhci_hcd *xhci, cur_td->urb->stream_id, cur_td, &deq_state); else - td_to_noop(xhci, ep_ring, cur_td); + td_to_noop(xhci, ep_ring, cur_td, false); remove_finished_td: /* * The event handler won't see a completion for this TD anymore, @@ -3223,6 +3238,7 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, start_trb = &ep_ring->enqueue->generic; start_cycle = ep_ring->cycle_state; + urb_priv = urb->hcpriv; /* Queue the first TRB, even if it's zero-length */ for (i = 0; i < num_tds; i++) { unsigned int total_packet_count; @@ -3246,12 +3262,13 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, ret = prepare_transfer(xhci, xhci->devs[slot_id], ep_index, urb->stream_id, trbs_per_td, urb, i, mem_flags); - if (ret < 0) - return ret; + if (ret < 0) { + if (i == 0) + return ret; + goto cleanup; + } - urb_priv = urb->hcpriv; td = urb_priv->td[i]; - for (j = 0; j < trbs_per_td; j++) { u32 remainder = 0; field = TRB_TBC(burst_count) | TRB_TLBPC(residue); @@ -3341,6 +3358,27 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, giveback_first_trb(xhci, slot_id, ep_index, urb->stream_id, start_cycle, start_trb); return 0; +cleanup: + /* Clean up a partially enqueued isoc transfer. */ + + for (i--; i >= 0; i--) + list_del(&urb_priv->td[i]->td_list); + + /* Use the first TD as a temporary variable to turn the TDs we've queued + * into No-ops with a software-owned cycle bit. That way the hardware + * won't accidentally start executing bogus TDs when we partially + * overwrite them. td->first_trb and td->start_seg are already set. + */ + urb_priv->td[0]->last_trb = ep_ring->enqueue; + /* Every TRB except the first & last will have its cycle bit flipped. */ + td_to_noop(xhci, ep_ring, urb_priv->td[0], true); + + /* Reset the ring enqueue back to the first TRB and its cycle bit. */ + ep_ring->enqueue = urb_priv->td[0]->first_trb; + ep_ring->enq_seg = urb_priv->td[0]->start_seg; + ep_ring->cycle_state = start_cycle; + usb_hcd_unlink_urb_from_ep(bus_to_hcd(urb->dev->bus), urb); + return ret; } /* -- cgit v1.2.3 From 585df1d90cb07a02ca6c7a7d339e56e46d50dafb Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Tue, 2 Aug 2011 15:43:40 -0700 Subject: xhci: Remove TDs from TD lists when URBs are canceled. When a driver tries to cancel an URB, and the host controller is dying, xhci_urb_dequeue will giveback the URB without removing the xhci_tds that comprise that URB from the td_list or the cancelled_td_list. This can cause a race condition between the driver calling URB dequeue and the stop endpoint command watchdog timer. If the timer fires on a dying host, and a driver attempts to resubmit while the watchdog timer has dropped the xhci->lock to giveback a cancelled URB, URBs may be given back by the xhci_urb_dequeue() function. At that point, the URB's priv pointer will be freed and set to NULL, but the TDs will remain on the td_list. This will cause an oops in xhci_giveback_urb_in_irq() when the watchdog timer attempts to loop through the endpoints' td_lists, giving back killed URBs. Make sure that xhci_urb_dequeue() removes TDs from the TD lists and canceled TD lists before it gives back the URB. This patch should be backported to kernels as old as 2.6.36. Signed-off-by: Sarah Sharp Cc: Andiry Xu Cc: stable@kernel.org --- drivers/usb/host/xhci-ring.c | 16 ++++++++-------- drivers/usb/host/xhci.c | 7 +++++++ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index f72149b666b1..b2d654b7477e 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -741,7 +741,7 @@ remove_finished_td: * so remove it from the endpoint ring's TD list. Keep it in * the cancelled TD list for URB completion later. */ - list_del(&cur_td->td_list); + list_del_init(&cur_td->td_list); } last_unlinked_td = cur_td; xhci_stop_watchdog_timer_in_irq(xhci, ep); @@ -769,7 +769,7 @@ remove_finished_td: do { cur_td = list_entry(ep->cancelled_td_list.next, struct xhci_td, cancelled_td_list); - list_del(&cur_td->cancelled_td_list); + list_del_init(&cur_td->cancelled_td_list); /* Clean up the cancelled URB */ /* Doesn't matter what we pass for status, since the core will @@ -877,9 +877,9 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) cur_td = list_first_entry(&ring->td_list, struct xhci_td, td_list); - list_del(&cur_td->td_list); + list_del_init(&cur_td->td_list); if (!list_empty(&cur_td->cancelled_td_list)) - list_del(&cur_td->cancelled_td_list); + list_del_init(&cur_td->cancelled_td_list); xhci_giveback_urb_in_irq(xhci, cur_td, -ESHUTDOWN, "killed"); } @@ -888,7 +888,7 @@ void xhci_stop_endpoint_command_watchdog(unsigned long arg) &temp_ep->cancelled_td_list, struct xhci_td, cancelled_td_list); - list_del(&cur_td->cancelled_td_list); + list_del_init(&cur_td->cancelled_td_list); xhci_giveback_urb_in_irq(xhci, cur_td, -ESHUTDOWN, "killed"); } @@ -1580,10 +1580,10 @@ td_cleanup: else *status = 0; } - list_del(&td->td_list); + list_del_init(&td->td_list); /* Was this TD slated to be cancelled but completed anyway? */ if (!list_empty(&td->cancelled_td_list)) - list_del(&td->cancelled_td_list); + list_del_init(&td->cancelled_td_list); urb_priv->td_cnt++; /* Giveback the urb when all the tds are completed */ @@ -3362,7 +3362,7 @@ cleanup: /* Clean up a partially enqueued isoc transfer. */ for (i--; i >= 0; i--) - list_del(&urb_priv->td[i]->td_list); + list_del_init(&urb_priv->td[i]->td_list); /* Use the first TD as a temporary variable to turn the TDs we've queued * into No-ops with a software-owned cycle bit. That way the hardware diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 8e84acff1134..3a0f695138f4 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -1252,6 +1252,13 @@ int xhci_urb_dequeue(struct usb_hcd *hcd, struct urb *urb, int status) if (temp == 0xffffffff || (xhci->xhc_state & XHCI_STATE_HALTED)) { xhci_dbg(xhci, "HW died, freeing TD.\n"); urb_priv = urb->hcpriv; + for (i = urb_priv->td_cnt; i < urb_priv->length; i++) { + td = urb_priv->td[i]; + if (!list_empty(&td->td_list)) + list_del_init(&td->td_list); + if (!list_empty(&td->cancelled_td_list)) + list_del_init(&td->cancelled_td_list); + } usb_hcd_unlink_urb_from_ep(hcd, urb); spin_unlock_irqrestore(&xhci->lock, flags); -- cgit v1.2.3 From 5185352c163a72cf969b2fbbfb89801b398896fd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 9 Aug 2011 14:48:11 -0700 Subject: libceph: fix msgpool There were several problems here: 1- we weren't tagging allocations with the pool, so they were never returned to the pool. 2- msgpool_put didn't add back to the mempool, even it were called. 3- msgpool_release didn't clear the pool pointer, so it would have looped had #1 not been broken. These may or may not have been responsible for #1136 or #1381 (BUG due to non-empty mempool on umount). I can't seem to trigger the crash now using the method I was using before. Signed-off-by: Sage Weil --- net/ceph/msgpool.c | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index d5f2d97ac05c..1f4cb30a42c5 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -7,27 +7,37 @@ #include -static void *alloc_fn(gfp_t gfp_mask, void *arg) +static void *msgpool_alloc(gfp_t gfp_mask, void *arg) { struct ceph_msgpool *pool = arg; - void *p; + struct ceph_msg *msg; - p = ceph_msg_new(0, pool->front_len, gfp_mask); - if (!p) - pr_err("msgpool %s alloc failed\n", pool->name); - return p; + msg = ceph_msg_new(0, pool->front_len, gfp_mask); + if (!msg) { + dout("msgpool_alloc %s failed\n", pool->name); + } else { + dout("msgpool_alloc %s %p\n", pool->name, msg); + msg->pool = pool; + } + return msg; } -static void free_fn(void *element, void *arg) +static void msgpool_free(void *element, void *arg) { - ceph_msg_put(element); + struct ceph_msgpool *pool = arg; + struct ceph_msg *msg = element; + + dout("msgpool_release %s %p\n", pool->name, msg); + msg->pool = NULL; + ceph_msg_put(msg); } int ceph_msgpool_init(struct ceph_msgpool *pool, int front_len, int size, bool blocking, const char *name) { + dout("msgpool %s init\n", name); pool->front_len = front_len; - pool->pool = mempool_create(size, alloc_fn, free_fn, pool); + pool->pool = mempool_create(size, msgpool_alloc, msgpool_free, pool); if (!pool->pool) return -ENOMEM; pool->name = name; @@ -36,14 +46,17 @@ int ceph_msgpool_init(struct ceph_msgpool *pool, void ceph_msgpool_destroy(struct ceph_msgpool *pool) { + dout("msgpool %s destroy\n", pool->name); mempool_destroy(pool->pool); } struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, int front_len) { + struct ceph_msg *msg; + if (front_len > pool->front_len) { - pr_err("msgpool_get pool %s need front %d, pool size is %d\n", + dout("msgpool_get %s need front %d, pool size is %d\n", pool->name, front_len, pool->front_len); WARN_ON(1); @@ -51,14 +64,19 @@ struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, return ceph_msg_new(0, front_len, GFP_NOFS); } - return mempool_alloc(pool->pool, GFP_NOFS); + msg = mempool_alloc(pool->pool, GFP_NOFS); + dout("msgpool_get %s %p\n", pool->name, msg); + return msg; } void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) { + dout("msgpool_put %s %p\n", pool->name, msg); + /* reset msg front_len; user may have changed it */ msg->front.iov_len = pool->front_len; msg->hdr.front_len = cpu_to_le32(pool->front_len); kref_init(&msg->kref); /* retake single ref */ + mempool_free(msg, pool->pool); } -- cgit v1.2.3 From 511d8cf0ab3d2e4ec3f3f672b06a83f17874b83b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 10 Aug 2011 09:41:26 +0900 Subject: ASoC: Fix typo in wm8750 spi_ids Signed-off-by: Mark Brown Reported-by: Stephen Rothwell --- sound/soc/codecs/wm8750.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c index 65fe78aa3757..e6f47f49357d 100644 --- a/sound/soc/codecs/wm8750.c +++ b/sound/soc/codecs/wm8750.c @@ -782,7 +782,7 @@ static const struct spi_device_id wm8750_spi_ids[] = { { "wm8750", 0 }, { "wm8987", 0 }, }; -MODULE_DEVICE_TABLE(spi, wm8750_spi_id); +MODULE_DEVICE_TABLE(spi, wm8750_spi_ids); static struct spi_driver wm8750_spi_driver = { .driver = { -- cgit v1.2.3 From c9c9e4e4252c9d554222906e4a843efd27c0ac96 Mon Sep 17 00:00:00 2001 From: Kazutomo Yoshii Date: Tue, 9 Aug 2011 23:39:13 -0500 Subject: ALSA: usb-audio - Add quirk for BOSS Micro BR-80 Signed-off-by: Kazutomo Yoshii Signed-off-by: Takashi Iwai --- sound/usb/quirks-table.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 4d4f86552a23..a42e3ef3832d 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -1707,6 +1707,40 @@ YAMAHA_DEVICE(0x7010, "UB99"), } } }, +{ + USB_DEVICE(0x0582, 0x0130), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + /* .vendor_name = "BOSS", */ + /* .product_name = "MICRO BR-80", */ + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_IGNORE_INTERFACE + }, + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = 3, + .type = QUIRK_MIDI_FIXED_ENDPOINT, + .data = & (const struct snd_usb_midi_endpoint_info) { + .out_cables = 0x0001, + .in_cables = 0x0001 + } + }, + { + .ifnum = -1 + } + } + } +}, /* Guillemot devices */ { -- cgit v1.2.3 From 96b635977984a88ecdb9cc76b8a54db7297f36e0 Mon Sep 17 00:00:00 2001 From: Wang Shaoyan Date: Wed, 10 Aug 2011 16:01:04 +0800 Subject: ALSA: hda - Add CONFIG_SND_HDA_POWER_SAVE to stac_vrefout_set() In commit 45eebda7, it add new function stac_vrefout_set, but it is only used in code between CONFIG_SND_HDA_POWER_SAVE macro, so add the macro to avoid such warning: sound/pci/hda/patch_sigmatel.c:676:12: warning: 'stac_vrefout_set' defined but not used Signed-off-by: Wang Shaoyan Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index aa376b59c006..5145b663ef6e 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -673,6 +673,7 @@ static int stac92xx_smux_enum_put(struct snd_kcontrol *kcontrol, return 0; } +#ifdef CONFIG_SND_HDA_POWER_SAVE static int stac_vrefout_set(struct hda_codec *codec, hda_nid_t nid, unsigned int new_vref) { @@ -696,6 +697,7 @@ static int stac_vrefout_set(struct hda_codec *codec, return 1; } +#endif static unsigned int stac92xx_vref_set(struct hda_codec *codec, hda_nid_t nid, unsigned int new_vref) -- cgit v1.2.3 From a5a3973da8b52944bc5909852714e55771c31ce7 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 10 Aug 2011 11:49:04 +0200 Subject: ALSA: azt3328 - adjust error handling code to include debugging code snd_azf3328_dbgcallenter is called at the very beginning of the function, so it could be useful to call snd_azf3328_dbgcallleave at all exit points. Signed-off-by: Julia Lawall Signed-off-by: Takashi Iwai --- sound/pci/azt3328.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sound/pci/azt3328.c b/sound/pci/azt3328.c index e4d76a270c9f..579fc0dce128 100644 --- a/sound/pci/azt3328.c +++ b/sound/pci/azt3328.c @@ -2625,16 +2625,19 @@ snd_azf3328_probe(struct pci_dev *pci, const struct pci_device_id *pci_id) int err; snd_azf3328_dbgcallenter(); - if (dev >= SNDRV_CARDS) - return -ENODEV; + if (dev >= SNDRV_CARDS) { + err = -ENODEV; + goto out; + } if (!enable[dev]) { dev++; - return -ENOENT; + err = -ENOENT; + goto out; } err = snd_card_create(index[dev], id[dev], THIS_MODULE, 0, &card); if (err < 0) - return err; + goto out; strcpy(card->driver, "AZF3328"); strcpy(card->shortname, "Aztech AZF3328 (PCI168)"); -- cgit v1.2.3 From 4f6fdf08681cecd9f38499de7a02eb4f05f399a7 Mon Sep 17 00:00:00 2001 From: Chase Douglas Date: Fri, 5 Aug 2011 09:16:57 -0700 Subject: HID: magicmouse: Set resolution of touch surfaces Add touch surface resolution information. The size of the touch surfaces has been determined to the hundredth of a mm. Cc: Jiri Kosina Cc: Michael Poole Cc: linux-input@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Chase Douglas [jkosina@suse.cz: update comments and commit message] Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 56 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index 0ec91c18a421..b5bdab3299bc 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -81,6 +81,28 @@ MODULE_PARM_DESC(report_undeciphered, "Report undeciphered multi-touch state fie #define NO_TOUCHES -1 #define SINGLE_TOUCH_UP -2 +/* Touch surface information. Dimension is in hundredths of a mm, min and max + * are in units. */ +#define MOUSE_DIMENSION_X (float)9056 +#define MOUSE_MIN_X -1100 +#define MOUSE_MAX_X 1258 +#define MOUSE_RES_X ((MOUSE_MAX_X - MOUSE_MIN_X) / (MOUSE_DIMENSION_X / 100)) +#define MOUSE_DIMENSION_Y (float)5152 +#define MOUSE_MIN_Y -1589 +#define MOUSE_MAX_Y 2047 +#define MOUSE_RES_Y ((MOUSE_MAX_Y - MOUSE_MIN_Y) / (MOUSE_DIMENSION_Y / 100)) + +#define TRACKPAD_DIMENSION_X (float)13000 +#define TRACKPAD_MIN_X -2909 +#define TRACKPAD_MAX_X 3167 +#define TRACKPAD_RES_X \ + ((TRACKPAD_MAX_X - TRACKPAD_MIN_X) / (TRACKPAD_DIMENSION_X / 100)) +#define TRACKPAD_DIMENSION_Y (float)11000 +#define TRACKPAD_MIN_Y -2456 +#define TRACKPAD_MAX_Y 2565 +#define TRACKPAD_RES_Y \ + ((TRACKPAD_MAX_Y - TRACKPAD_MIN_Y) / (TRACKPAD_DIMENSION_Y / 100)) + /** * struct magicmouse_sc - Tracks Magic Mouse-specific data. * @input: Input device through which we report events. @@ -406,17 +428,31 @@ static void magicmouse_setup_input(struct input_dev *input, struct hid_device *h * inverse of the reported Y. */ if (input->id.product == USB_DEVICE_ID_APPLE_MAGICMOUSE) { - input_set_abs_params(input, ABS_MT_POSITION_X, -1100, - 1358, 4, 0); - input_set_abs_params(input, ABS_MT_POSITION_Y, -1589, - 2047, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_X, + MOUSE_MIN_X, MOUSE_MAX_X, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, + MOUSE_MIN_Y, MOUSE_MAX_Y, 4, 0); + + input_abs_set_res(input, ABS_MT_POSITION_X, + MOUSE_RES_X); + input_abs_set_res(input, ABS_MT_POSITION_Y, + MOUSE_RES_Y); } else { /* USB_DEVICE_ID_APPLE_MAGICTRACKPAD */ - input_set_abs_params(input, ABS_X, -2909, 3167, 4, 0); - input_set_abs_params(input, ABS_Y, -2456, 2565, 4, 0); - input_set_abs_params(input, ABS_MT_POSITION_X, -2909, - 3167, 4, 0); - input_set_abs_params(input, ABS_MT_POSITION_Y, -2456, - 2565, 4, 0); + input_set_abs_params(input, ABS_X, TRACKPAD_MIN_X, + TRACKPAD_MAX_X, 4, 0); + input_set_abs_params(input, ABS_Y, TRACKPAD_MIN_Y, + TRACKPAD_MAX_Y, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_X, + TRACKPAD_MIN_X, TRACKPAD_MAX_X, 4, 0); + input_set_abs_params(input, ABS_MT_POSITION_Y, + TRACKPAD_MIN_Y, TRACKPAD_MAX_Y, 4, 0); + + input_abs_set_res(input, ABS_X, TRACKPAD_RES_X); + input_abs_set_res(input, ABS_Y, TRACKPAD_RES_Y); + input_abs_set_res(input, ABS_MT_POSITION_X, + TRACKPAD_RES_X); + input_abs_set_res(input, ABS_MT_POSITION_Y, + TRACKPAD_RES_Y); } input_set_events_per_packet(input, 60); -- cgit v1.2.3 From 971c90bfa2f0b4fe52d6d9002178d547706f1343 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 4 Aug 2011 07:25:35 -0700 Subject: alarmtimers: Avoid possible null pointer traversal We don't check if old_setting is non null before assigning it, so correct this. CC: Thomas Gleixner CC: stable@kernel.org Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 59f369f98a04..1dee3f62a6a7 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -479,11 +479,8 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, if (!rtcdev) return -ENOTSUPP; - /* Save old values */ - old_setting->it_interval = - ktime_to_timespec(timr->it.alarmtimer.period); - old_setting->it_value = - ktime_to_timespec(timr->it.alarmtimer.node.expires); + if (old_setting) + alarm_timer_get(timr, old_setting); /* If the timer was already set, cancel it */ alarm_cancel(&timr->it.alarmtimer); -- cgit v1.2.3 From ea7802f630d356acaf66b3c0b28c00a945fc35dc Mon Sep 17 00:00:00 2001 From: John Stultz Date: Thu, 4 Aug 2011 07:51:56 -0700 Subject: alarmtimers: Memset itimerspec passed into alarm_timer_get Following common_timer_get, zero out the itimerspec passed in. CC: Thomas Gleixner CC: stable@kernel.org Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 1dee3f62a6a7..0e9263f6fd09 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -441,6 +441,8 @@ static int alarm_timer_create(struct k_itimer *new_timer) static void alarm_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) { + memset(cur_setting, 0, sizeof(struct itimerspec)); + cur_setting->it_interval = ktime_to_timespec(timr->it.alarmtimer.period); cur_setting->it_value = -- cgit v1.2.3 From fc8ed7be738ffb1b3b0140ed2de6def38b9a7101 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2011 12:42:26 -0300 Subject: perf top browser: Remove spurious helpline update It will be immediately replaced in perf_top_browser__run. Cc: David Ahern Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-q7e2jzb44elqpkvdllk94x0i@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/ui/browsers/top.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/perf/util/ui/browsers/top.c b/tools/perf/util/ui/browsers/top.c index 5a06538532af..88403cf8396a 100644 --- a/tools/perf/util/ui/browsers/top.c +++ b/tools/perf/util/ui/browsers/top.c @@ -208,6 +208,5 @@ int perf_top__tui_browser(struct perf_top *top) }, }; - ui_helpline__push("Press <- or ESC to exit"); return perf_top_browser__run(&browser); } -- cgit v1.2.3 From 6af7e471e5a7746b8024d70b4363d3dfe41d36b8 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 10 Aug 2011 10:26:09 -0700 Subject: alarmtimers: Avoid possible denial of service with high freq periodic timers Its possible to jam up the alarm timers by setting very small interval timers, which will cause the alarmtimer subsystem to spend all of its time firing and restarting timers. This can effectivly lock up a box. A deeper fix is needed, closely mimicking the hrtimer code, but for now just cap the interval to 100us to avoid userland hanging the system. CC: Thomas Gleixner CC: stable@kernel.org Signed-off-by: John Stultz --- kernel/time/alarmtimer.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 0e9263f6fd09..ea5e1a928d5b 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -481,6 +481,15 @@ static int alarm_timer_set(struct k_itimer *timr, int flags, if (!rtcdev) return -ENOTSUPP; + /* + * XXX HACK! Currently we can DOS a system if the interval + * period on alarmtimers is too small. Cap the interval here + * to 100us and solve this properly in a future patch! -jstultz + */ + if ((new_setting->it_interval.tv_sec == 0) && + (new_setting->it_interval.tv_nsec < 100000)) + new_setting->it_interval.tv_nsec = 100000; + if (old_setting) alarm_timer_get(timr, old_setting); -- cgit v1.2.3 From 15439bde3af7ff88459ea2b5520b77312e958df2 Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Fri, 5 Aug 2011 13:49:52 +0200 Subject: ALSA: snd-usb-caiaq: Correct offset fields of outbound iso_frame_desc This fixes faulty outbount packets in case the inbound packets received from the hardware are fragmented and contain bogus input iso frames. The bug has been there for ages, but for some strange reasons, it was only triggered by newer machines in 64bit mode. Signed-off-by: Daniel Mack Reported-and-tested-by: William Light Reported-by: Pedro Ribeiro Cc: stable@kernel.org Signed-off-by: Takashi Iwai --- sound/usb/caiaq/audio.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c index d0d493ca28ae..aa52b3e13bb5 100644 --- a/sound/usb/caiaq/audio.c +++ b/sound/usb/caiaq/audio.c @@ -614,6 +614,7 @@ static void read_completed(struct urb *urb) struct snd_usb_caiaqdev *dev; struct urb *out; int frame, len, send_it = 0, outframe = 0; + size_t offset = 0; if (urb->status || !info) return; @@ -634,7 +635,8 @@ static void read_completed(struct urb *urb) len = urb->iso_frame_desc[outframe].actual_length; out->iso_frame_desc[outframe].length = len; out->iso_frame_desc[outframe].actual_length = 0; - out->iso_frame_desc[outframe].offset = BYTES_PER_FRAME * frame; + out->iso_frame_desc[outframe].offset = offset; + offset += len; if (len > 0) { spin_lock(&dev->spinlock); @@ -650,7 +652,7 @@ static void read_completed(struct urb *urb) } if (send_it) { - out->number_of_packets = FRAMES_PER_URB; + out->number_of_packets = outframe; out->transfer_flags = URB_ISO_ASAP; usb_submit_urb(out, GFP_ATOMIC); } -- cgit v1.2.3 From 30eefc95841ce51c3281876f0b954dd1d3c0bd5f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 10 Aug 2011 11:22:42 -0700 Subject: xen: xen-selfballoon.c needs more header files Fix build errors (found when CONFIG_SYSFS is not enabled): drivers/xen/xen-selfballoon.c:446: warning: data definition has no type or storage class drivers/xen/xen-selfballoon.c:446: warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL' drivers/xen/xen-selfballoon.c:446: warning: parameter names (without types) in function declaration drivers/xen/xen-selfballoon.c:485: error: expected declaration specifiers or '...' before string constant drivers/xen/xen-selfballoon.c:485: warning: data definition has no type or storage class drivers/xen/xen-selfballoon.c:485: warning: type defaults to 'int' in declaration of 'MODULE_LICENSE' drivers/xen/xen-selfballoon.c:485: warning: function declaration isn't a prototype Signed-off-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-selfballoon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 1b4afd81f872..6ea852e25162 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From bf6ed027bcc93f8d54d321fe87f0434b25699eb1 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 10 Aug 2011 21:11:26 +0800 Subject: rtc: ep93xx: Fix 'rtc' may be used uninitialized warning commit 92d921c5d "rtc: ep93xx: Initialize drvdata before registering device" ensures the drvdata is initialized prior to registering the rtc device. But it set the drvdata to an uninitialized pointer. Thus calling platform_get_drvdata in ep93xx_rtc_remove does not get correct address. This patch fixes below warning by adding struct rtc_device *rtc to struct ep93xx_rtc. Then set platform drvdata to ep93xx_rtc instead of rtc. CC drivers/rtc/rtc-ep93xx.o drivers/rtc/rtc-ep93xx.c: In function 'ep93xx_rtc_probe': drivers/rtc/rtc-ep93xx.c:154: warning: 'rtc' may be used uninitialized in this function Signed-off-by: Axel Lin Signed-off-by: John Stultz --- drivers/rtc/rtc-ep93xx.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c index 335551d333b2..14a42a1edc66 100644 --- a/drivers/rtc/rtc-ep93xx.c +++ b/drivers/rtc/rtc-ep93xx.c @@ -36,6 +36,7 @@ */ struct ep93xx_rtc { void __iomem *mmio_base; + struct rtc_device *rtc; }; static int ep93xx_rtc_get_swcomp(struct device *dev, unsigned short *preload, @@ -130,7 +131,6 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) { struct ep93xx_rtc *ep93xx_rtc; struct resource *res; - struct rtc_device *rtc; int err; ep93xx_rtc = devm_kzalloc(&pdev->dev, sizeof(*ep93xx_rtc), GFP_KERNEL); @@ -151,12 +151,12 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) return -ENXIO; pdev->dev.platform_data = ep93xx_rtc; - platform_set_drvdata(pdev, rtc); + platform_set_drvdata(pdev, ep93xx_rtc); - rtc = rtc_device_register(pdev->name, + ep93xx_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev, &ep93xx_rtc_ops, THIS_MODULE); - if (IS_ERR(rtc)) { - err = PTR_ERR(rtc); + if (IS_ERR(ep93xx_rtc->rtc)) { + err = PTR_ERR(ep93xx_rtc->rtc); goto exit; } @@ -167,7 +167,7 @@ static int __init ep93xx_rtc_probe(struct platform_device *pdev) return 0; fail: - rtc_device_unregister(rtc); + rtc_device_unregister(ep93xx_rtc->rtc); exit: platform_set_drvdata(pdev, NULL); pdev->dev.platform_data = NULL; @@ -176,11 +176,11 @@ exit: static int __exit ep93xx_rtc_remove(struct platform_device *pdev) { - struct rtc_device *rtc = platform_get_drvdata(pdev); + struct ep93xx_rtc *ep93xx_rtc = platform_get_drvdata(pdev); sysfs_remove_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files); platform_set_drvdata(pdev, NULL); - rtc_device_unregister(rtc); + rtc_device_unregister(ep93xx_rtc->rtc); pdev->dev.platform_data = NULL; return 0; -- cgit v1.2.3 From dec35d19c4ec65b94df3b27b6e373f0d48c9cd32 Mon Sep 17 00:00:00 2001 From: Ilkka Koskinen Date: Wed, 16 Mar 2011 06:07:14 +0000 Subject: rtc: rtc-twl: Switch to using threaded irq The driver is accessing to i2c bus in interrupt handler. Therefore, it should use threaded irq. Signed-off-by: Ilkka Koskinen Acked-by: Balaji T K Signed-off-by: John Stultz --- drivers/rtc/rtc-twl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 9a81f778d6b2..1963cddbf214 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -462,7 +462,7 @@ static int __devinit twl_rtc_probe(struct platform_device *pdev) if (ret < 0) goto out1; - ret = request_irq(irq, twl_rtc_interrupt, + ret = request_threaded_irq(irq, NULL, twl_rtc_interrupt, IRQF_TRIGGER_RISING, dev_name(&rtc->dev), rtc); if (ret < 0) { -- cgit v1.2.3 From 34d623d11316cb69f9e8cc5eb50d3792b5c302b6 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 31 May 2011 08:51:39 +0000 Subject: rtc: rtc-twl: Remove lockdep related local_irq_enable() Now that the irq is properly threaded (due to it needing i2c access) we should also remove the local_irq_enable() call in twl_rtc_interrupt. Testing this with Pandaboard, the RTC is still working. [Reworked commit message -jstultz] Signed-off-by: John Stultz --- drivers/rtc/rtc-twl.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/rtc/rtc-twl.c b/drivers/rtc/rtc-twl.c index 1963cddbf214..9677bbc433f9 100644 --- a/drivers/rtc/rtc-twl.c +++ b/drivers/rtc/rtc-twl.c @@ -362,14 +362,6 @@ static irqreturn_t twl_rtc_interrupt(int irq, void *rtc) int res; u8 rd_reg; -#ifdef CONFIG_LOCKDEP - /* WORKAROUND for lockdep forcing IRQF_DISABLED on us, which - * we don't want and can't tolerate. Although it might be - * friendlier not to borrow this thread context... - */ - local_irq_enable(); -#endif - res = twl_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG); if (res) goto out; -- cgit v1.2.3 From 938f97bcf1bdd1b681d5d14d1d7117a2e22d4434 Mon Sep 17 00:00:00 2001 From: John Stultz Date: Fri, 22 Jul 2011 09:12:51 +0000 Subject: rtc: Fix RTC PIE frequency limit Thomas earlier submitted a fix to limit the RTC PIE freq, but picked 5000Hz out of the air. Willy noticed that we should instead use the 8192Hz max from the rtc man documentation. Cc: Willy Tarreau Cc: stable@kernel.org Cc: Thomas Gleixner Signed-off-by: John Stultz --- drivers/rtc/interface.c | 2 +- include/linux/rtc.h | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c index 3195dbd3ec34..eb4c88316a15 100644 --- a/drivers/rtc/interface.c +++ b/drivers/rtc/interface.c @@ -708,7 +708,7 @@ int rtc_irq_set_freq(struct rtc_device *rtc, struct rtc_task *task, int freq) int err = 0; unsigned long flags; - if (freq <= 0 || freq > 5000) + if (freq <= 0 || freq > RTC_MAX_FREQ) return -EINVAL; retry: spin_lock_irqsave(&rtc->irq_task_lock, flags); diff --git a/include/linux/rtc.h b/include/linux/rtc.h index b27ebea25660..93f4d035076b 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -97,6 +97,9 @@ struct rtc_pll_info { #define RTC_AF 0x20 /* Alarm interrupt */ #define RTC_UF 0x10 /* Update interrupt for 1Hz RTC */ + +#define RTC_MAX_FREQ 8192 + #ifdef __KERNEL__ #include -- cgit v1.2.3 From 280ec8b718e8565333ace339d6bba91239440b20 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 10 Aug 2011 22:19:19 +0900 Subject: ASoC: Add missing break in WM8994 probe This error would have no effect on current silicon revisions, the fall through case has the same behaviour. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8994.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 09e680ae88b2..b393f9fac97a 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -2981,6 +2981,7 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec) wm8994->hubs.dcs_readback_mode = 1; break; } + break; case WM8958: wm8994->hubs.dcs_readback_mode = 1; -- cgit v1.2.3 From feb00dceb5af57ce34514ce66096b32d133ded3d Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 11 Aug 2011 12:23:22 +0900 Subject: ASoC: Terminate WM8750 SPI device ID table Signed-off-by: Mark Brown Reported-by: Stephen Rothwell --- sound/soc/codecs/wm8750.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c index e6f47f49357d..82ac5fcaa2b2 100644 --- a/sound/soc/codecs/wm8750.c +++ b/sound/soc/codecs/wm8750.c @@ -781,6 +781,7 @@ static int __devexit wm8750_spi_remove(struct spi_device *spi) static const struct spi_device_id wm8750_spi_ids[] = { { "wm8750", 0 }, { "wm8987", 0 }, + { 0, 0 }, }; MODULE_DEVICE_TABLE(spi, wm8750_spi_ids); -- cgit v1.2.3 From 8e4bf84474960e832b56293c9b0674c88b5b05ce Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 11 Aug 2011 10:36:03 +0200 Subject: Move some REQ flags to the common bio/request area REQ_SECURE, REQ_FLUSH and REQ_FUA may all be set on a bio as well as on a request, so relocate them to the shared part of the enum. Signed-off-by: Matthew Wilcox Signed-off-by: Namhyung Kim Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6395692b2e7a..32f0076e844b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -125,7 +125,11 @@ enum rq_flag_bits { __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ __REQ_DISCARD, /* request to discard sectors */ + __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ + __REQ_NOIDLE, /* don't anticipate more IO after this one */ + __REQ_FUA, /* forced unit access */ + __REQ_FLUSH, /* request for cache flush */ /* bio only flags */ __REQ_RAHEAD, /* read ahead, can fail anytime */ @@ -135,7 +139,6 @@ enum rq_flag_bits { /* request only flags */ __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_FUA, /* forced unit access */ __REQ_NOMERGE, /* don't touch this for merging */ __REQ_STARTED, /* drive already may have started this one */ __REQ_DONTPREP, /* don't call prep for this one */ @@ -146,11 +149,9 @@ enum rq_flag_bits { __REQ_PREEMPT, /* set for "ide_preempt" requests */ __REQ_ALLOCED, /* request came from our alloc pool */ __REQ_COPY_USER, /* contains copies of user pages */ - __REQ_FLUSH, /* request for cache flush */ __REQ_FLUSH_SEQ, /* request for flush sequence */ __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ - __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ __REQ_NR_BITS, /* stops here */ }; -- cgit v1.2.3 From c09c47caedc9854d59378d6e34c989e51cfdd2b4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 11 Aug 2011 10:36:05 +0200 Subject: blktrace: add FLUSH/FUA support Add FLUSH/FUA support to blktrace. As FLUSH precedes WRITE and/or FUA follows WRITE, use the same 'F' flag for both cases and distinguish them by their (relative) position. The end results look like (other flags might be shown also): - WRITE: W - WRITE_FLUSH: FW - WRITE_FUA: WF - WRITE_FLUSH_FUA: FWF Note that we reuse TC_BARRIER due to lack of bit space of act_mask so that the older versions of blktrace tools will report flush requests as barriers from now on. Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Signed-off-by: Namhyung Kim Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 5 +++-- include/trace/events/block.h | 20 +++++++++++--------- kernel/trace/blktrace.c | 21 ++++++++++++++++----- 3 files changed, 30 insertions(+), 16 deletions(-) diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 8c7c2de7631a..8e9e4bc6d73b 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -14,7 +14,7 @@ enum blktrace_cat { BLK_TC_READ = 1 << 0, /* reads */ BLK_TC_WRITE = 1 << 1, /* writes */ - BLK_TC_BARRIER = 1 << 2, /* barrier */ + BLK_TC_FLUSH = 1 << 2, /* flush */ BLK_TC_SYNC = 1 << 3, /* sync IO */ BLK_TC_SYNCIO = BLK_TC_SYNC, BLK_TC_QUEUE = 1 << 4, /* queueing/merging */ @@ -28,8 +28,9 @@ enum blktrace_cat { BLK_TC_META = 1 << 12, /* metadata */ BLK_TC_DISCARD = 1 << 13, /* discard requests */ BLK_TC_DRV_DATA = 1 << 14, /* binary per-driver data */ + BLK_TC_FUA = 1 << 15, /* fua requests */ - BLK_TC_END = 1 << 15, /* only 16-bits, reminder */ + BLK_TC_END = 1 << 15, /* we've run out of bits! */ }; #define BLK_TC_SHIFT (16) diff --git a/include/trace/events/block.h b/include/trace/events/block.h index bf366547da25..05c5e61f0a7c 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -8,6 +8,8 @@ #include #include +#define RWBS_LEN 8 + DECLARE_EVENT_CLASS(block_rq_with_error, TP_PROTO(struct request_queue *q, struct request *rq), @@ -19,7 +21,7 @@ DECLARE_EVENT_CLASS(block_rq_with_error, __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( int, errors ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -104,7 +106,7 @@ DECLARE_EVENT_CLASS(block_rq, __field( sector_t, sector ) __field( unsigned int, nr_sector ) __field( unsigned int, bytes ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) __dynamic_array( char, cmd, blk_cmd_buf_len(rq) ) ), @@ -183,7 +185,7 @@ TRACE_EVENT(block_bio_bounce, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -222,7 +224,7 @@ TRACE_EVENT(block_bio_complete, __field( sector_t, sector ) __field( unsigned, nr_sector ) __field( int, error ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( @@ -249,7 +251,7 @@ DECLARE_EVENT_CLASS(block_bio, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -321,7 +323,7 @@ DECLARE_EVENT_CLASS(block_get_rq, __field( dev_t, dev ) __field( sector_t, sector ) __field( unsigned int, nr_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -456,7 +458,7 @@ TRACE_EVENT(block_split, __field( dev_t, dev ) __field( sector_t, sector ) __field( sector_t, new_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN ) __array( char, comm, TASK_COMM_LEN ) ), @@ -498,7 +500,7 @@ TRACE_EVENT(block_bio_remap, __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( @@ -542,7 +544,7 @@ TRACE_EVENT(block_rq_remap, __field( unsigned int, nr_sector ) __field( dev_t, old_dev ) __field( sector_t, old_sector ) - __array( char, rwbs, 6 ) + __array( char, rwbs, RWBS_LEN) ), TP_fast_assign( diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 6957aa298dfa..7c910a5593a6 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -206,6 +206,8 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(rw, RAHEAD); what |= MASK_TC_BIT(rw, META); what |= MASK_TC_BIT(rw, DISCARD); + what |= MASK_TC_BIT(rw, FLUSH); + what |= MASK_TC_BIT(rw, FUA); pid = tsk->pid; if (act_log_check(bt, what, sector, pid)) @@ -1054,6 +1056,9 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) goto out; } + if (tc & BLK_TC_FLUSH) + rwbs[i++] = 'F'; + if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; else if (tc & BLK_TC_WRITE) @@ -1063,10 +1068,10 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) else rwbs[i++] = 'N'; + if (tc & BLK_TC_FUA) + rwbs[i++] = 'F'; if (tc & BLK_TC_AHEAD) rwbs[i++] = 'A'; - if (tc & BLK_TC_BARRIER) - rwbs[i++] = 'B'; if (tc & BLK_TC_SYNC) rwbs[i++] = 'S'; if (tc & BLK_TC_META) @@ -1132,7 +1137,7 @@ typedef int (blk_log_action_t) (struct trace_iterator *iter, const char *act); static int blk_log_action_classic(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; unsigned long long ts = iter->ts; unsigned long nsec_rem = do_div(ts, NSEC_PER_SEC); unsigned secs = (unsigned long)ts; @@ -1148,7 +1153,7 @@ static int blk_log_action_classic(struct trace_iterator *iter, const char *act) static int blk_log_action(struct trace_iterator *iter, const char *act) { - char rwbs[6]; + char rwbs[RWBS_LEN]; const struct blk_io_trace *t = te_blk_io_trace(iter->ent); fill_rwbs(rwbs, t); @@ -1561,7 +1566,7 @@ static const struct { } mask_maps[] = { { BLK_TC_READ, "read" }, { BLK_TC_WRITE, "write" }, - { BLK_TC_BARRIER, "barrier" }, + { BLK_TC_FLUSH, "flush" }, { BLK_TC_SYNC, "sync" }, { BLK_TC_QUEUE, "queue" }, { BLK_TC_REQUEUE, "requeue" }, @@ -1573,6 +1578,7 @@ static const struct { { BLK_TC_META, "meta" }, { BLK_TC_DISCARD, "discard" }, { BLK_TC_DRV_DATA, "drv_data" }, + { BLK_TC_FUA, "fua" }, }; static int blk_trace_str2mask(const char *str) @@ -1788,6 +1794,9 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) { int i = 0; + if (rw & REQ_FLUSH) + rwbs[i++] = 'F'; + if (rw & WRITE) rwbs[i++] = 'W'; else if (rw & REQ_DISCARD) @@ -1797,6 +1806,8 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes) else rwbs[i++] = 'N'; + if (rw & REQ_FUA) + rwbs[i++] = 'F'; if (rw & REQ_RAHEAD) rwbs[i++] = 'A'; if (rw & REQ_SYNC) -- cgit v1.2.3 From bcf30e75b773b60379338768677a1301ef602ff9 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 11 Aug 2011 10:39:04 +0200 Subject: block: improve rq_affinity placement This patch reverts commit 35ae66e0a09ab70ed(block: Make rq_affinity = 1 work as expected). The purpose is to avoid an unnecessary IPI. Let's take an example. My test box has cpu 0-7, one socket. Say request is added from CPU 1, blk_complete_request() occurs at CPU 7. Without the reverted patch, softirq will be done at CPU 7. With it, an IPI will be directed to CPU 0, and softirq will be done at CPU 0. In this case, doing softirq at CPU 0 and CPU 7 have no difference from cache sharing point view and we can avoid an ipi if doing it in CPU 7. An immediate concern is this is just like QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is running in interrupt handler, and currently I/O controller doesn't support multiple interrupts (I checked several LSI cards and AHCI), so only one CPU can run blk_complete_request(). This is still quite different as QUEUE_FLAG_SAME_FORCE. Since only one CPU runs softirq, the only difference with below patch is softirq not always runs at the first CPU of a group. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-softirq.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 487addc85bb5..58340d0cb23a 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -103,7 +103,7 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = { void __blk_complete_request(struct request *req) { - int ccpu, cpu; + int ccpu, cpu, group_cpu = NR_CPUS; struct request_queue *q = req->q; unsigned long flags; @@ -117,12 +117,22 @@ void __blk_complete_request(struct request *req) */ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { ccpu = req->cpu; - if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) + if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { ccpu = blk_cpu_to_group(ccpu); + group_cpu = blk_cpu_to_group(cpu); + } } else ccpu = cpu; - if (ccpu == cpu) { + /* + * If current CPU and requested CPU are in the same group, running + * softirq in current CPU. One might concern this is just like + * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is + * running in interrupt handler, and currently I/O controller doesn't + * support multiple interrupts, so current CPU is unique actually. This + * avoids IPI sending from current CPU to the first CPU of a group. + */ + if (ccpu == cpu || ccpu == group_cpu) { struct list_head *list; do_local: list = &__get_cpu_var(blk_cpu_done); -- cgit v1.2.3 From f57b05ed532ccf3b3e22878a5678ca10de50ad29 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 1 Jun 2011 21:43:46 +0200 Subject: perf report: Use properly build_id kernel binaries If we bring the recorded perf data together with kernel binary from another machine using: on server A: perf archive on server B: tar xjvf perf.data.tar.bz2 -C ~/.debug the build_id kernel dso is not properly recognized during the "perf report" command on server B. The reason is, that build_id dsos are added during the session initialization, while the kernel maps are created during the sample event processing. The machine__create_kernel_maps functions ends up creating new dso object for kernel, but it does not check if we already have one added by build_id processing. Also the build_id reading ABI quirk added in commit: - commit b25114817a73bbd2b84ce9dba02ee1ef8989a947 perf build-id: Add quirk to deal with perf.data file format breakage populates the "struct build_id_event::pid" with 0, which is later interpreted as DEFAULT_GUEST_KERNEL_ID. This is not always correct, so it's better to guess the pid value based on the "struct build_id_event::header::misc" value. - Tested with data generated on x86 kernel version v2.6.34 and reported back on x86_64 current kernel. - Not tested for guest kernel case. Note the problem stays for PERF_RECORD_MMAP events recorded by perf that does not use proper pid (HOST_KERNEL_ID/DEFAULT_GUEST_KERNEL_ID). They are misinterpreted within the current perf code. Probably there's not much we can do about that. Cc: Avi Kivity Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20110601194346.GB1934@jolsa.brq.redhat.com Signed-off-by: Jiri Olsa Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 11 +++++++++- tools/perf/util/symbol.c | 57 +++++++++++++++++++++++++++--------------------- tools/perf/util/symbol.h | 1 - 3 files changed, 42 insertions(+), 27 deletions(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index d4f3101773db..b6c1ad123ca9 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -726,7 +726,16 @@ static int perf_header__read_build_ids_abi_quirk(struct perf_header *header, return -1; bev.header = old_bev.header; - bev.pid = 0; + + /* + * As the pid is the missing value, we need to fill + * it properly. The header.misc value give us nice hint. + */ + bev.pid = HOST_KERNEL_ID; + if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER || + bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL) + bev.pid = DEFAULT_GUEST_KERNEL_ID; + memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id)); __event_process_build_id(&bev, filename, session); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a8b53714542a..e142c21ae9a5 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -2181,27 +2181,22 @@ size_t machines__fprintf_dsos_buildid(struct rb_root *machines, return ret; } -struct dso *dso__new_kernel(const char *name) +static struct dso* +dso__kernel_findnew(struct machine *machine, const char *name, + const char *short_name, int dso_type) { - struct dso *dso = dso__new(name ?: "[kernel.kallsyms]"); - - if (dso != NULL) { - dso__set_short_name(dso, "[kernel]"); - dso->kernel = DSO_TYPE_KERNEL; - } - - return dso; -} + /* + * The kernel dso could be created by build_id processing. + */ + struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name); -static struct dso *dso__new_guest_kernel(struct machine *machine, - const char *name) -{ - char bf[PATH_MAX]; - struct dso *dso = dso__new(name ?: machine__mmap_name(machine, bf, - sizeof(bf))); + /* + * We need to run this in all cases, since during the build_id + * processing we had no idea this was the kernel dso. + */ if (dso != NULL) { - dso__set_short_name(dso, "[guest.kernel]"); - dso->kernel = DSO_TYPE_GUEST_KERNEL; + dso__set_short_name(dso, short_name); + dso->kernel = dso_type; } return dso; @@ -2219,24 +2214,36 @@ void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine) dso->has_build_id = true; } -static struct dso *machine__create_kernel(struct machine *machine) +static struct dso *machine__get_kernel(struct machine *machine) { const char *vmlinux_name = NULL; struct dso *kernel; if (machine__is_host(machine)) { vmlinux_name = symbol_conf.vmlinux_name; - kernel = dso__new_kernel(vmlinux_name); + if (!vmlinux_name) + vmlinux_name = "[kernel.kallsyms]"; + + kernel = dso__kernel_findnew(machine, vmlinux_name, + "[kernel]", + DSO_TYPE_KERNEL); } else { + char bf[PATH_MAX]; + if (machine__is_default_guest(machine)) vmlinux_name = symbol_conf.default_guest_vmlinux_name; - kernel = dso__new_guest_kernel(machine, vmlinux_name); + if (!vmlinux_name) + vmlinux_name = machine__mmap_name(machine, bf, + sizeof(bf)); + + kernel = dso__kernel_findnew(machine, vmlinux_name, + "[guest.kernel]", + DSO_TYPE_GUEST_KERNEL); } - if (kernel != NULL) { + if (kernel != NULL && (!kernel->has_build_id)) dso__read_running_kernel_build_id(kernel, machine); - dsos__add(&machine->kernel_dsos, kernel); - } + return kernel; } @@ -2340,7 +2347,7 @@ void machine__destroy_kernel_maps(struct machine *machine) int machine__create_kernel_maps(struct machine *machine) { - struct dso *kernel = machine__create_kernel(machine); + struct dso *kernel = machine__get_kernel(machine); if (kernel == NULL || __machine__create_kernel_maps(machine, kernel) < 0) diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 325ee36a9d29..4f377d92e75a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -155,7 +155,6 @@ struct dso { }; struct dso *dso__new(const char *name); -struct dso *dso__new_kernel(const char *name); void dso__delete(struct dso *dso); int dso__name_len(const struct dso *dso); -- cgit v1.2.3 From b3b46d76d0fcbb1f737107cec1a1ee87bc5e5fd3 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Thu, 11 Aug 2011 12:06:28 -0400 Subject: APEI: Fix WHEA _OSC call Bit 0 of the support parameter to the OSC call should be set in order to indicate that the OS supports the WHEA mechanism. Stuart Hayes tracked an APEI issue on some Dell platforms down to this. Reported-by: Stuart Hayes Signed-off-by: Matthew Garrett Signed-off-by: Len Brown --- drivers/acpi/apei/apei-base.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 8041248fce9b..61540360d5ce 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -618,7 +618,7 @@ int apei_osc_setup(void) }; capbuf[OSC_QUERY_TYPE] = OSC_QUERY_ENABLE; - capbuf[OSC_SUPPORT_TYPE] = 0; + capbuf[OSC_SUPPORT_TYPE] = 1; capbuf[OSC_CONTROL_TYPE] = 0; if (ACPI_FAILURE(acpi_get_handle(NULL, "\\_SB", &handle)) -- cgit v1.2.3 From d9b830fa444c1f4955d0ee88f5af2aa24d2c7837 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 11 Aug 2011 09:19:29 -0700 Subject: Input: mpu3050 - correct call to input_free_device input_free_device() should be used if input_register_device() was not called yet or if it failed. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/misc/mpu3050.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/misc/mpu3050.c b/drivers/input/misc/mpu3050.c index b95fac15b2ea..f71dc728da58 100644 --- a/drivers/input/misc/mpu3050.c +++ b/drivers/input/misc/mpu3050.c @@ -282,7 +282,7 @@ err_free_irq: err_pm_set_suspended: pm_runtime_set_suspended(&client->dev); err_free_mem: - input_unregister_device(idev); + input_free_device(idev); kfree(sensor); return error; } -- cgit v1.2.3 From 22f83205e59c97c2460ad8e4bd6e71268cb2f37f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Thu, 11 Aug 2011 09:22:45 -0700 Subject: Input: tegra-kbc - correct call to input_free_device If kzalloc for kbc fails, then we have NULL pointer dereference while calling input_free_device(kbc->idev) in the error handling. So it is safer to always use the original name, input_dev. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/tegra-kbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c index f270447ba951..a5a77915c650 100644 --- a/drivers/input/keyboard/tegra-kbc.c +++ b/drivers/input/keyboard/tegra-kbc.c @@ -702,7 +702,7 @@ err_iounmap: err_free_mem_region: release_mem_region(res->start, resource_size(res)); err_free_mem: - input_free_device(kbc->idev); + input_free_device(input_dev); kfree(kbc); return err; -- cgit v1.2.3 From 044cd3a574be5cd97ab80d0c6d06f5fab327541d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 29 Jul 2011 22:08:07 -0700 Subject: hwmon: (pmbus) Virtualize pmbus_write_byte With virtual pages and to be able to handle more chips, it is necessary to virtualise pmbus_write_byte(). Signed-off-by: Guenter Roeck Reviewed-by: Robert Coulson --- drivers/hwmon/pmbus/pmbus.h | 1 + drivers/hwmon/pmbus/pmbus_core.c | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h index 0808d986d75b..a6ae20ffef6b 100644 --- a/drivers/hwmon/pmbus/pmbus.h +++ b/drivers/hwmon/pmbus/pmbus.h @@ -325,6 +325,7 @@ struct pmbus_driver_info { int (*read_word_data)(struct i2c_client *client, int page, int reg); int (*write_word_data)(struct i2c_client *client, int page, int reg, u16 word); + int (*write_byte)(struct i2c_client *client, int page, u8 value); /* * The identify function determines supported PMBus functionality. * This function is only necessary if a chip driver supports multiple diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index 5c1b6cf31701..a561c3a0e916 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -182,6 +182,24 @@ int pmbus_write_byte(struct i2c_client *client, int page, u8 value) } EXPORT_SYMBOL_GPL(pmbus_write_byte); +/* + * _pmbus_write_byte() is similar to pmbus_write_byte(), but checks if + * a device specific mapping funcion exists and calls it if necessary. + */ +static int _pmbus_write_byte(struct i2c_client *client, int page, u8 value) +{ + struct pmbus_data *data = i2c_get_clientdata(client); + const struct pmbus_driver_info *info = data->info; + int status; + + if (info->write_byte) { + status = info->write_byte(client, page, value); + if (status != -ENODATA) + return status; + } + return pmbus_write_byte(client, page, value); +} + int pmbus_write_word_data(struct i2c_client *client, u8 page, u8 reg, u16 word) { int rv; @@ -281,7 +299,7 @@ static int _pmbus_read_byte_data(struct i2c_client *client, int page, int reg) static void pmbus_clear_fault_page(struct i2c_client *client, int page) { - pmbus_write_byte(client, page, PMBUS_CLEAR_FAULTS); + _pmbus_write_byte(client, page, PMBUS_CLEAR_FAULTS); } void pmbus_clear_faults(struct i2c_client *client) -- cgit v1.2.3 From 3a2805e845761ea76a6ad5688d637b2624de0cab Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 29 Jul 2011 23:05:25 -0700 Subject: hwmon: (pmbus/lm25066) Ignore byte writes to non-zero pages pmbus_clear_faults() attempts to clear faults on non-existing real pages. As a result, the command error bit in the status register is set, and faults are not really cleared. All byte writes to non-zero pages are requests to clear the status register on that page. Since non-zero pages are virtual and do not exist on the chip, there is nothing to do, and such requests have to be ignored. This fixes above problem. Signed-off-by: Guenter Roeck Reviewed-by: Robert Coulson --- drivers/hwmon/pmbus/lm25066.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/hwmon/pmbus/lm25066.c b/drivers/hwmon/pmbus/lm25066.c index d4bc114572de..ac254fba551b 100644 --- a/drivers/hwmon/pmbus/lm25066.c +++ b/drivers/hwmon/pmbus/lm25066.c @@ -161,6 +161,17 @@ static int lm25066_write_word_data(struct i2c_client *client, int page, int reg, return ret; } +static int lm25066_write_byte(struct i2c_client *client, int page, u8 value) +{ + if (page > 1) + return -EINVAL; + + if (page == 0) + return pmbus_write_byte(client, 0, value); + + return 0; +} + static int lm25066_probe(struct i2c_client *client, const struct i2c_device_id *id) { @@ -204,6 +215,7 @@ static int lm25066_probe(struct i2c_client *client, info->read_word_data = lm25066_read_word_data; info->write_word_data = lm25066_write_word_data; + info->write_byte = lm25066_write_byte; switch (id->driver_data) { case lm25066: -- cgit v1.2.3 From 66a89b2164e2d30661edbd1953eacf0594d8203a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 9 Aug 2011 11:10:56 -0400 Subject: hwmon: (ibmaem) add missing kfree rs_resp is dynamically allocated in aem_read_sensor(), so it should be freed before exiting in every case. This collects the kfree and the return at the end of the function. Signed-off-by: Julia Lawall Signed-off-by: Guenter Roeck Cc: stable@kernel.org # 2.6.27+ --- drivers/hwmon/ibmaem.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/hwmon/ibmaem.c b/drivers/hwmon/ibmaem.c index 1a409c5bc9bc..c316294c48b4 100644 --- a/drivers/hwmon/ibmaem.c +++ b/drivers/hwmon/ibmaem.c @@ -432,13 +432,15 @@ static int aem_read_sensor(struct aem_data *data, u8 elt, u8 reg, aem_send_message(ipmi); res = wait_for_completion_timeout(&ipmi->read_complete, IPMI_TIMEOUT); - if (!res) - return -ETIMEDOUT; + if (!res) { + res = -ETIMEDOUT; + goto out; + } if (ipmi->rx_result || ipmi->rx_msg_len != rs_size || memcmp(&rs_resp->id, &system_x_id, sizeof(system_x_id))) { - kfree(rs_resp); - return -ENOENT; + res = -ENOENT; + goto out; } switch (size) { @@ -463,8 +465,11 @@ static int aem_read_sensor(struct aem_data *data, u8 elt, u8 reg, break; } } + res = 0; - return 0; +out: + kfree(rs_resp); + return res; } /* Update AEM energy registers */ -- cgit v1.2.3 From 789e66612367f9975d704c9e4990025cbbbb45ec Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 9 Aug 2011 18:44:44 +0000 Subject: [CIFS] Cleanup use of CONFIG_CIFS_STATS2 ifdef to make transport routines more readable Christoph had requested that the stats related code (in CONFIG_CIFS_STATS2) be moved into helpers to make code flow more readable. This patch should help. For example the following section from transport.c spin_unlock(&GlobalMid_Lock); atomic_inc(&ses->server->num_waiters); wait_event(ses->server->request_q, atomic_read(&ses->server->inFlight) < cifs_max_pending); atomic_dec(&ses->server->num_waiters); spin_lock(&GlobalMid_Lock); becomes simpler (with the patch below): spin_unlock(&GlobalMid_Lock); cifs_num_waiters_inc(server); wait_event(server->request_q, atomic_read(&server->inFlight) < cifs_max_pending); cifs_num_waiters_dec(server); spin_lock(&GlobalMid_Lock); Reviewed-by: Jeff Layton CC: Christoph Hellwig Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/cifs_debug.c | 2 +- fs/cifs/cifsglob.h | 56 +++++++++++++++++++++++++++++++++++++++++++++------- fs/cifs/transport.c | 51 ++++++++++++++++------------------------------- 3 files changed, 67 insertions(+), 42 deletions(-) diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 2fe3cf13b2e9..6d40656e1e29 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -176,7 +176,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) #ifdef CONFIG_CIFS_STATS2 seq_printf(m, " In Send: %d In MaxReq Wait: %d", - atomic_read(&server->inSend), + atomic_read(&server->in_send), atomic_read(&server->num_waiters)); #endif diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 38ce6d44b145..95dad9d14cf1 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -291,7 +291,7 @@ struct TCP_Server_Info { struct fscache_cookie *fscache; /* client index cache cookie */ #endif #ifdef CONFIG_CIFS_STATS2 - atomic_t inSend; /* requests trying to send */ + atomic_t in_send; /* requests trying to send */ atomic_t num_waiters; /* blocked waiting to get in sendrecv */ #endif }; @@ -672,12 +672,54 @@ struct mid_q_entry { bool multiEnd:1; /* both received */ }; -struct oplock_q_entry { - struct list_head qhead; - struct inode *pinode; - struct cifs_tcon *tcon; - __u16 netfid; -}; +/* Make code in transport.c a little cleaner by moving + update of optional stats into function below */ +#ifdef CONFIG_CIFS_STATS2 + +static inline void cifs_in_send_inc(struct TCP_Server_Info *server) +{ + atomic_inc(&server->in_send); +} + +static inline void cifs_in_send_dec(struct TCP_Server_Info *server) +{ + atomic_dec(&server->in_send); +} + +static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) +{ + atomic_inc(&server->num_waiters); +} + +static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) +{ + atomic_dec(&server->num_waiters); +} + +static inline void cifs_save_when_sent(struct mid_q_entry *mid) +{ + mid->when_sent = jiffies; +} +#else +static inline void cifs_in_send_inc(struct TCP_Server_Info *server) +{ +} +static inline void cifs_in_send_dec(struct TCP_Server_Info *server) +{ +} + +static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) +{ +} + +static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) +{ +} + +static inline void cifs_save_when_sent(struct mid_q_entry *mid) +{ +} +#endif /* for pending dnotify requests */ struct dir_notify_req { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index c1b9c4b10739..10ca6b2c26b7 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -266,15 +266,11 @@ static int wait_for_free_request(struct TCP_Server_Info *server, while (1) { if (atomic_read(&server->inFlight) >= cifs_max_pending) { spin_unlock(&GlobalMid_Lock); -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&server->num_waiters); -#endif + cifs_num_waiters_inc(server); wait_event(server->request_q, atomic_read(&server->inFlight) < cifs_max_pending); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&server->num_waiters); -#endif + cifs_num_waiters_dec(server); spin_lock(&GlobalMid_Lock); } else { if (server->tcpStatus == CifsExiting) { @@ -381,15 +377,13 @@ cifs_call_async(struct TCP_Server_Info *server, struct kvec *iov, mid->callback = callback; mid->callback_data = cbdata; mid->midState = MID_REQUEST_SUBMITTED; -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&server->inSend); -#endif + + cifs_in_send_inc(server); rc = smb_sendv(server, iov, nvec); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&server->inSend); - mid->when_sent = jiffies; -#endif + cifs_in_send_dec(server); + cifs_save_when_sent(mid); mutex_unlock(&server->srv_mutex); + if (rc) goto out_err; @@ -575,14 +569,10 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses, } midQ->midState = MID_REQUEST_SUBMITTED; -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&ses->server->inSend); -#endif + cifs_in_send_inc(ses->server); rc = smb_sendv(ses->server, iov, n_vec); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&ses->server->inSend); - midQ->when_sent = jiffies; -#endif + cifs_in_send_dec(ses->server); + cifs_save_when_sent(midQ); mutex_unlock(&ses->server->srv_mutex); @@ -703,14 +693,11 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses, } midQ->midState = MID_REQUEST_SUBMITTED; -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&ses->server->inSend); -#endif + + cifs_in_send_inc(ses->server); rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&ses->server->inSend); - midQ->when_sent = jiffies; -#endif + cifs_in_send_dec(ses->server); + cifs_save_when_sent(midQ); mutex_unlock(&ses->server->srv_mutex); if (rc < 0) @@ -843,14 +830,10 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon, } midQ->midState = MID_REQUEST_SUBMITTED; -#ifdef CONFIG_CIFS_STATS2 - atomic_inc(&ses->server->inSend); -#endif + cifs_in_send_inc(ses->server); rc = smb_send(ses->server, in_buf, be32_to_cpu(in_buf->smb_buf_length)); -#ifdef CONFIG_CIFS_STATS2 - atomic_dec(&ses->server->inSend); - midQ->when_sent = jiffies; -#endif + cifs_in_send_dec(ses->server); + cifs_save_when_sent(midQ); mutex_unlock(&ses->server->srv_mutex); if (rc < 0) { -- cgit v1.2.3 From e22906c564c2f9c73ee4621ef3b93fe374539f00 Mon Sep 17 00:00:00 2001 From: Shirish Pargaonkar Date: Tue, 9 Aug 2011 14:30:39 -0500 Subject: cifs: Do not set cifs/ntfs acl using a file handle (try #4) Set security descriptor using path name instead of a file handle. We can't be sure that the file handle has adequate permission to set a security descriptor (to modify DACL). Function set_cifs_acl_by_fid() has been removed since we can't be sure how a file was opened for writing, a valid request can fail if the file was not opened with two above mentioned permissions. We could have opted to add on WRITE_DAC and WRITE_OWNER permissions to file opens and then use that file handle but adding addtional permissions such as WRITE_DAC and WRITE_OWNER could cause an any open to fail. And it was incorrect to look for read file handle to set a security descriptor anyway. Signed-off-by: Shirish Pargaonkar Signed-off-by: Steve French --- fs/cifs/cifsacl.c | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 21de1d6d5849..d0f59faefb78 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -991,24 +991,6 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, return pntsd; } -static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid, - struct cifs_ntsd *pnntsd, u32 acllen) -{ - int xid, rc; - struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); - - if (IS_ERR(tlink)) - return PTR_ERR(tlink); - - xid = GetXid(); - rc = CIFSSMBSetCIFSACL(xid, tlink_tcon(tlink), fid, pnntsd, acllen); - FreeXid(xid); - cifs_put_tlink(tlink); - - cFYI(DBG2, "SetCIFSACL rc = %d", rc); - return rc; -} - static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, struct cifs_ntsd *pnntsd, u32 acllen) { @@ -1047,18 +1029,10 @@ int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, struct inode *inode, const char *path) { struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct cifsFileInfo *open_file; - int rc; cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode); - open_file = find_readable_file(CIFS_I(inode), true); - if (!open_file) - return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); - - rc = set_cifs_acl_by_fid(cifs_sb, open_file->netfid, pnntsd, acllen); - cifsFileInfo_put(open_file); - return rc; + return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); } /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ -- cgit v1.2.3 From 4b1bfb7d2d125af6653d6c2305356b2677f79dc6 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 10 Aug 2011 15:32:22 +0200 Subject: rt2x00: fix crash in rt2800usb_write_tx_desc Patch should fix this oops: BUG: unable to handle kernel NULL pointer dereference at 000000a0 IP: [] rt2800usb_write_tx_desc+0x18/0xc0 [rt2800usb] *pdpt = 000000002408c001 *pde = 0000000024079067 *pte = 0000000000000000 Oops: 0000 [#1] SMP EIP: 0060:[] EFLAGS: 00010282 CPU: 0 EIP is at rt2800usb_write_tx_desc+0x18/0xc0 [rt2800usb] EAX: 00000035 EBX: ef2bef10 ECX: 00000000 EDX: d40958a0 ESI: ef1865f8 EDI: ef1865f8 EBP: d4095878 ESP: d409585c DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Call Trace: [] rt2x00queue_write_tx_frame+0x155/0x300 [rt2x00lib] [] rt2x00mac_tx+0x7c/0x370 [rt2x00lib] [] ? mark_held_locks+0x62/0x90 [] ? _raw_spin_unlock_irqrestore+0x35/0x60 [] ? trace_hardirqs_on_caller+0x5a/0x170 [] ? trace_hardirqs_on+0xb/0x10 [] __ieee80211_tx+0x5c/0x1e0 [mac80211] [] ieee80211_tx+0xbc/0xe0 [mac80211] [] ? ieee80211_tx+0x23/0xe0 [mac80211] [] ieee80211_xmit+0xc1/0x200 [mac80211] [] ? ieee80211_tx+0xe0/0xe0 [mac80211] [] ? lock_release_holdtime+0x35/0x1b0 [] ? ieee80211_subif_start_xmit+0x446/0x5f0 [mac80211] [] ieee80211_subif_start_xmit+0x29d/0x5f0 [mac80211] [] ? ieee80211_subif_start_xmit+0x3e4/0x5f0 [mac80211] [] ? sock_setsockopt+0x6a8/0x6f0 [] ? sock_setsockopt+0x520/0x6f0 [] dev_hard_start_xmit+0x2ef/0x650 Oops might happen because we perform parallel putting new entries in a queue (rt2x00queue_write_tx_frame()) and removing entries after finishing transmitting (rt2800usb_work_txdone()). There are cases when _txdone may process an entry that was not fully send and nullify entry->skb . To fix check in _txdone if entry has flags that indicate pending transmission and wait until flags get cleared. Reported-by: Justin Piszcz Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2800usb.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 939563162fb3..2cb25ea13c52 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -464,6 +464,15 @@ static bool rt2800usb_txdone_entry_check(struct queue_entry *entry, u32 reg) int wcid, ack, pid; int tx_wcid, tx_ack, tx_pid; + if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) || + !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) { + WARNING(entry->queue->rt2x00dev, + "Data pending for entry %u in queue %u\n", + entry->entry_idx, entry->queue->qid); + cond_resched(); + return false; + } + wcid = rt2x00_get_field32(reg, TX_STA_FIFO_WCID); ack = rt2x00_get_field32(reg, TX_STA_FIFO_TX_ACK_REQUIRED); pid = rt2x00_get_field32(reg, TX_STA_FIFO_PID_TYPE); @@ -558,8 +567,10 @@ static void rt2800usb_work_txdone(struct work_struct *work) while (!rt2x00queue_empty(queue)) { entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE); - if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags)) + if (test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags) || + !test_bit(ENTRY_DATA_STATUS_PENDING, &entry->flags)) break; + if (test_bit(ENTRY_DATA_IO_FAILED, &entry->flags)) rt2x00lib_txdone_noinfo(entry, TXDONE_FAILURE); else if (rt2x00queue_status_timeout(entry)) -- cgit v1.2.3 From df71c9cfceea801e7e26e2c74241758ef9c042e5 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 10 Aug 2011 15:32:23 +0200 Subject: rt2x00: fix order of entry flags modification In rt2800usb_work_txdone we check flags in order: - ENTRY_OWNER_DEVICE_DATA - ENTRY_DATA_STATUS_PENDING - ENTRY_DATA_IO_FAILED Modify flags in separate order in rt2x00usb_interrupt_txdone, to avoid processing entries in _txdone with wrong flags or skip processing ready entries. Reported-by: Justin Piszcz Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00usb.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c index b6b4542c2460..7fbb55c9da82 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c @@ -262,23 +262,20 @@ static void rt2x00usb_interrupt_txdone(struct urb *urb) struct queue_entry *entry = (struct queue_entry *)urb->context; struct rt2x00_dev *rt2x00dev = entry->queue->rt2x00dev; - if (!test_and_clear_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags)) + if (!test_bit(ENTRY_OWNER_DEVICE_DATA, &entry->flags)) return; - - if (rt2x00dev->ops->lib->tx_dma_done) - rt2x00dev->ops->lib->tx_dma_done(entry); - - /* - * Report the frame as DMA done - */ - rt2x00lib_dmadone(entry); - /* * Check if the frame was correctly uploaded */ if (urb->status) set_bit(ENTRY_DATA_IO_FAILED, &entry->flags); + /* + * Report the frame as DMA done + */ + rt2x00lib_dmadone(entry); + if (rt2x00dev->ops->lib->tx_dma_done) + rt2x00dev->ops->lib->tx_dma_done(entry); /* * Schedule the delayed work for reading the TX status * from the device. -- cgit v1.2.3 From 674db1344443204b6ce3293f2df8fd1b7665deea Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 10 Aug 2011 15:32:24 +0200 Subject: rt2x00: fix crash in rt2800usb_get_txwi Patch should fix this oops: BUG: unable to handle kernel NULL pointer dereference at 000000a0 IP: [] rt2800usb_get_txwi+0x19/0x70 [rt2800usb] *pdpt = 0000000000000000 *pde = f000ff53f000ff53 Oops: 0000 [#1] SMP Pid: 198, comm: kworker/u:3 Tainted: G W 3.0.0-wl+ #9 LENOVO 6369CTO/6369CTO EIP: 0060:[] EFLAGS: 00010283 CPU: 1 EIP is at rt2800usb_get_txwi+0x19/0x70 [rt2800usb] EAX: 00000000 EBX: f465e140 ECX: f4494960 EDX: ef24c5f8 ESI: 810f21f5 EDI: f1da9960 EBP: f4581e80 ESP: f4581e70 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process kworker/u:3 (pid: 198, ti=f4580000 task=f4494960 task.ti=f4580000) Call Trace: [] rt2800_txdone_entry+0x2f/0xf0 [rt2800lib] [] ? warn_slowpath_common+0x7d/0xa0 [] ? rt2800usb_work_txdone+0x288/0x360 [rt2800usb] [] ? rt2800usb_work_txdone+0x288/0x360 [rt2800usb] [] rt2800usb_work_txdone+0x263/0x360 [rt2800usb] [] process_one_work+0x186/0x440 [] ? process_one_work+0x10a/0x440 [] ? rt2800usb_probe_hw+0x120/0x120 [rt2800usb] [] worker_thread+0x133/0x310 [] ? trace_hardirqs_on+0xb/0x10 [] ? manage_workers+0x1e0/0x1e0 [] kthread+0x7c/0x90 [] ? __init_kthread_worker+0x60/0x60 [] kernel_thread_helper+0x6/0x1 Oops might happen because we check rt2x00queue_empty(queue) twice, but this condition can change and we can process entry in rt2800_txdone_entry(), which was already processed by rt2800usb_txdone_entry_check() -> rt2x00lib_txdone_noinfo() and has nullify entry->skb . Reported-by: Justin Piszcz Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2800usb.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2800usb.c b/drivers/net/wireless/rt2x00/rt2800usb.c index 2cb25ea13c52..dbf501ca317f 100644 --- a/drivers/net/wireless/rt2x00/rt2800usb.c +++ b/drivers/net/wireless/rt2x00/rt2800usb.c @@ -538,12 +538,11 @@ static void rt2800usb_txdone(struct rt2x00_dev *rt2x00dev) entry = rt2x00queue_get_entry(queue, Q_INDEX_DONE); if (rt2800usb_txdone_entry_check(entry, reg)) break; + entry = NULL; } - if (!entry || rt2x00queue_empty(queue)) - break; - - rt2800_txdone_entry(entry, reg); + if (entry) + rt2800_txdone_entry(entry, reg); } } -- cgit v1.2.3 From 96242116d483cd98ab55fb989ca096f6f9cc3738 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 11 Aug 2011 12:14:05 -0600 Subject: PNP: update pnp.debug usage (needs value on command line) Commit cdefba03e44 changed pnp.debug from a boot param to a module param, which means it needs a value when used on the command line. CC: Thomas Renninger Signed-off-by: Bjorn Helgaas Signed-off-by: Len Brown --- Documentation/kernel-parameters.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index e279b7242912..130713f71875 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2081,9 +2081,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Override pmtimer IOPort with a hex value. e.g. pmtmr=0x508 - pnp.debug [PNP] - Enable PNP debug messages. This depends on the - CONFIG_PNP_DEBUG_MESSAGES option. + pnp.debug=1 [PNP] + Enable PNP debug messages (depends on the + CONFIG_PNP_DEBUG_MESSAGES option). Change at run-time + via /sys/module/pnp/parameters/debug. We always show + current resource usage; turning this on also shows + possible settings and some assignment information. pnpacpi= [ACPI] { off } -- cgit v1.2.3 From 03ba176a29dae5b4849f45c0b5c89b9d78baa2c6 Mon Sep 17 00:00:00 2001 From: Chen Gong Date: Wed, 10 Aug 2011 10:46:22 +0800 Subject: ACPI APEI: Add Kconfig option IRQ_WORK for GHES IRQ_WORK is used by GHES, but it is selected by PERF_EVENT. For now PERF_EVENT is selected by x86 by default, but in concept, IRQ_WORK should be selected by GHES, not by others. Signed-off-by: Chen Gong Signed-off-by: Len Brown --- drivers/acpi/apei/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index c34aa51af4ee..e3f47872ec22 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -13,6 +13,7 @@ config ACPI_APEI_GHES bool "APEI Generic Hardware Error Source" depends on ACPI_APEI && X86 select ACPI_HED + select IRQ_WORK select LLIST select GENERIC_ALLOCATOR help -- cgit v1.2.3 From 8475e2336cf80ba6e7b27715b4b3214d73c211ab Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 3 Aug 2011 17:22:45 +0300 Subject: Bluetooth: unlock if allocation fails in hci_blacklist_add() There was a small typo here so we never actually hit the goto which would call hci_dev_unlock_bh(). Signed-off-by: Dan Carpenter Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ec0bc3f60f2e..fca62dcd7f1b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1327,7 +1327,7 @@ int hci_blacklist_add(struct hci_dev *hdev, bdaddr_t *bdaddr) entry = kzalloc(sizeof(struct bdaddr_list), GFP_KERNEL); if (!entry) { - return -ENOMEM; + err = -ENOMEM; goto err; } -- cgit v1.2.3 From 4935f1c164ac528dff3538f97953b385ba500710 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Tue, 9 Aug 2011 17:16:28 +0200 Subject: Bluetooth: btusb: be quiet on device disconnect Disabling the bluetooth usb device embedded in (some) ThinkPads tends to lead to errors like these: btusb_bulk_complete: hci0 urb ffff88011b9bfd68 failed to resubmit (19) btusb_intr_complete: hci0 urb ffff88011b46a318 failed to resubmit (19) btusb_bulk_complete: hci0 urb ffff88011b46a000 failed to resubmit (19) That is because usb_disconnect() doesn't "quiesces" pending urbs. Disconnecting a device is a normal thing to happen so it's no big deal that usb_submit_urb() returns -ENODEV. The simplest way to get rid of these errors is to stop treating that return as an error. Trivial, actually. While we're at it, add comments to be explicit about the reasons we're not complaining about -EPERM and -ENODEV. Signed-off-by: Paul Bolle Signed-off-by: Gustavo F. Padovan --- drivers/bluetooth/btusb.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 91d13a9e8c65..9e4448efb104 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -256,7 +256,9 @@ static void btusb_intr_complete(struct urb *urb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { - if (err != -EPERM) + /* -EPERM: urb is being killed; + * -ENODEV: device got disconnected */ + if (err != -EPERM && err != -ENODEV) BT_ERR("%s urb %p failed to resubmit (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); @@ -341,7 +343,9 @@ static void btusb_bulk_complete(struct urb *urb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { - if (err != -EPERM) + /* -EPERM: urb is being killed; + * -ENODEV: device got disconnected */ + if (err != -EPERM && err != -ENODEV) BT_ERR("%s urb %p failed to resubmit (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); @@ -431,7 +435,9 @@ static void btusb_isoc_complete(struct urb *urb) err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { - if (err != -EPERM) + /* -EPERM: urb is being killed; + * -ENODEV: device got disconnected */ + if (err != -EPERM && err != -ENODEV) BT_ERR("%s urb %p failed to resubmit (%d)", hdev->name, urb, -err); usb_unanchor_urb(urb); -- cgit v1.2.3 From 8e7c3d2e4ba18ee4cdcc1f89aec944fbff4ce735 Mon Sep 17 00:00:00 2001 From: Ricardo Mendoza Date: Wed, 13 Jul 2011 16:04:29 +0100 Subject: Bluetooth: Add Toshiba laptops AR30XX device ID Blacklist Toshiba-branded AR3011 based AR5B195 [0930:0215] and add to ath3k.c for firmware loading. Signed-off-by: Ricardo Mendoza Signed-off-by: Gustavo F. Padovan --- drivers/bluetooth/ath3k.c | 1 + drivers/bluetooth/btusb.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index a5854735bb2e..db7cb8111fbe 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -63,6 +63,7 @@ static struct usb_device_id ath3k_table[] = { /* Atheros AR3011 with sflash firmware*/ { USB_DEVICE(0x0CF3, 0x3002) }, { USB_DEVICE(0x13d3, 0x3304) }, + { USB_DEVICE(0x0930, 0x0215) }, /* Atheros AR9285 Malbec with sflash firmware */ { USB_DEVICE(0x03F0, 0x311D) }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 9e4448efb104..3ef476070baf 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -106,6 +106,7 @@ static struct usb_device_id blacklist_table[] = { /* Atheros 3011 with sflash firmware */ { USB_DEVICE(0x0cf3, 0x3002), .driver_info = BTUSB_IGNORE }, { USB_DEVICE(0x13d3, 0x3304), .driver_info = BTUSB_IGNORE }, + { USB_DEVICE(0x0930, 0x0215), .driver_info = BTUSB_IGNORE }, /* Atheros AR9285 Malbec with sflash firmware */ { USB_DEVICE(0x03f0, 0x311d), .driver_info = BTUSB_IGNORE }, -- cgit v1.2.3 From e5842cdb0f4f2c68f6acd39e286e5d10d8c073e8 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:35 -0400 Subject: Bluetooth: rfcomm: Remove unnecessary krfcommd event Removed superfluous event handling which was used to signal that the rfcomm kthread had been woken. This appears to have been used to prevent lost wakeups. Correctly ordering when the task state is set to TASK_INTERRUPTIBLE is sufficient to prevent lost wakeups. To prevent wakeups which occurred prior to initially setting TASK_INTERRUPTIBLE from being lost, the main work of the thread loop - rfcomm_process_sessions() - is performed prior to sleeping. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/rfcomm/core.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 5759bb7054f7..5ba3f6df665c 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -62,7 +62,6 @@ static DEFINE_MUTEX(rfcomm_mutex); #define rfcomm_lock() mutex_lock(&rfcomm_mutex) #define rfcomm_unlock() mutex_unlock(&rfcomm_mutex) -static unsigned long rfcomm_event; static LIST_HEAD(session_list); @@ -120,7 +119,6 @@ static inline void rfcomm_schedule(void) { if (!rfcomm_thread) return; - set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); wake_up_process(rfcomm_thread); } @@ -2038,19 +2036,18 @@ static int rfcomm_run(void *unused) rfcomm_add_listener(BDADDR_ANY); - while (!kthread_should_stop()) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!test_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event)) { - /* No pending events. Let's sleep. - * Incoming connections and data will wake us up. */ - schedule(); - } - set_current_state(TASK_RUNNING); + + if (kthread_should_stop()) + break; /* Process stuff */ - clear_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); rfcomm_process_sessions(); + + schedule(); } + __set_current_state(TASK_RUNNING); rfcomm_kill_listener(); -- cgit v1.2.3 From 950e2d51e866623e4c360280aa63b85ab66d3403 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:41 -0400 Subject: Bluetooth: rfcomm: Fix lost wakeups waiting to accept socket Fix race conditions which can cause lost wakeups (or missed signals) while waiting to accept an rfcomm socket connection. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/rfcomm/sock.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 8f01e6b11a70..482722bbc7a0 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -485,11 +485,6 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f lock_sock(sk); - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; @@ -501,19 +496,20 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(nsk = bt_accept_dequeue(sk, newsock))) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; break; } - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); + nsk = bt_accept_dequeue(sk, newsock); + if (nsk) + break; - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; + if (!timeo) { + err = -EAGAIN; break; } @@ -521,8 +517,12 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f err = sock_intr_errno(timeo); break; } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) -- cgit v1.2.3 From 9be4e3fbf2d3603e7a7010ede0697166738a788b Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:46 -0400 Subject: Bluetooth: Fix lost wakeups waiting for sock state change Fix race conditions which can cause lost wakeups while waiting for sock state to change. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/af_bluetooth.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 8add9b499912..117e0d161780 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -494,9 +494,8 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) BT_DBG("sk %p", sk); add_wait_queue(sk_sleep(sk), &wait); + set_current_state(TASK_INTERRUPTIBLE); while (sk->sk_state != state) { - set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { err = -EINPROGRESS; break; @@ -510,12 +509,13 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); + set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) break; } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return err; } -- cgit v1.2.3 From f9a3c20aa07462108fc6fd759dea956053f020bb Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:10:52 -0400 Subject: Bluetooth: l2cap: Fix lost wakeups waiting to accept socket Fix race conditions which can cause lost wakeups (or misssed signals) while waiting to accept an l2cap socket connection. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 5c36b3e8739c..7d713b1c4cbd 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -235,30 +235,26 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl lock_sock_nested(sk, SINGLE_DEPTH_NESTING); - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(nsk = bt_accept_dequeue(sk, newsock))) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; break; } - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock_nested(sk, SINGLE_DEPTH_NESTING); + nsk = bt_accept_dequeue(sk, newsock); + if (nsk) + break; - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; + if (!timeo) { + err = -EAGAIN; break; } @@ -266,8 +262,12 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl err = sock_intr_errno(timeo); break; } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) -- cgit v1.2.3 From 552b0d3cb9ff648aa503011ef50ca24019cd0f5f Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:11:01 -0400 Subject: Bluetooth: sco: Fix lost wakeups waiting to accept socket Fix race conditions which can cause lost wakeups (or missed signals) while waiting to accept a sco socket connection. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/sco.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 4c3621b5e0aa..8270f05e3f1f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -564,30 +564,26 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag lock_sock(sk); - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; - goto done; - } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); - while (!(ch = bt_accept_dequeue(sk, newsock))) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (!timeo) { - err = -EAGAIN; + + if (sk->sk_state != BT_LISTEN) { + err = -EBADFD; break; } - release_sock(sk); - timeo = schedule_timeout(timeo); - lock_sock(sk); + ch = bt_accept_dequeue(sk, newsock); + if (ch) + break; - if (sk->sk_state != BT_LISTEN) { - err = -EBADFD; + if (!timeo) { + err = -EAGAIN; break; } @@ -595,8 +591,12 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag err = sock_intr_errno(timeo); break; } + + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); if (err) -- cgit v1.2.3 From 38d57555616afcdad7381b02b523d494327494cd Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:11:07 -0400 Subject: Bluetooth: bnep: Fix lost wakeup of session thread Fix race condition which can result in missing the wakeup intended to stop the session thread. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/bnep/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index ca39fcf010ce..7e8ff3c24942 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -484,9 +484,11 @@ static int bnep_session(void *arg) init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); - while (!kthread_should_stop()) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; /* RX */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { skb_orphan(skb); @@ -504,7 +506,7 @@ static int bnep_session(void *arg) schedule(); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); /* Cleanup session */ -- cgit v1.2.3 From 3a3f5c7df55a1294c9e6e2d0b8cea604b137438f Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 24 Jul 2011 00:11:10 -0400 Subject: Bluetooth: cmtp: Fix lost wakeup of session thread Fix race condition which can result in missing the wakeup intended to stop the session thread. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/cmtp/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index c5b11af908be..2eb854ab10f6 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -292,9 +292,11 @@ static int cmtp_session(void *arg) init_waitqueue_entry(&wait, current); add_wait_queue(sk_sleep(sk), &wait); - while (!kthread_should_stop()) { + while (1) { set_current_state(TASK_INTERRUPTIBLE); + if (kthread_should_stop()) + break; if (sk->sk_state != BT_CONNECTED) break; @@ -307,7 +309,7 @@ static int cmtp_session(void *arg) schedule(); } - set_current_state(TASK_RUNNING); + __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); down_write(&cmtp_session_sem); -- cgit v1.2.3 From a71a0cf4e9cdb1c43843977a1efc43f96f6efc21 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Mon, 25 Jul 2011 18:36:26 -0400 Subject: Bluetooth: l2cap: Fix lost wakeup waiting for ERTM acks Fix race condition which can result in missing wakeup during l2cap socket shutdown. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 3204ba8a701c..b3bdb482bbe6 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1159,9 +1159,8 @@ int __l2cap_wait_ack(struct sock *sk) int timeo = HZ/5; add_wait_queue(sk_sleep(sk), &wait); - while ((chan->unacked_frames > 0 && chan->conn)) { - set_current_state(TASK_INTERRUPTIBLE); - + set_current_state(TASK_INTERRUPTIBLE); + while (chan->unacked_frames > 0 && chan->conn) { if (!timeo) timeo = HZ/5; @@ -1173,6 +1172,7 @@ int __l2cap_wait_ack(struct sock *sk) release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); + set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) -- cgit v1.2.3 From 6be6b11f006840ba7d8d4b959b3fa0c522f8468a Mon Sep 17 00:00:00 2001 From: Chen Ganir Date: Thu, 28 Jul 2011 15:42:09 +0300 Subject: Bluetooth: Fixed wrong L2CAP Sock timer value L2CAP connection timeout needs to be assigned as miliseconds and not as jiffies. Signed-off-by: Chen Ganir Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_sock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 7d713b1c4cbd..61f1f623091d 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -993,7 +993,7 @@ static struct sock *l2cap_sock_alloc(struct net *net, struct socket *sock, int p INIT_LIST_HEAD(&bt_sk(sk)->accept_q); sk->sk_destruct = l2cap_sock_destruct; - sk->sk_sndtimeo = msecs_to_jiffies(L2CAP_CONN_TIMEOUT); + sk->sk_sndtimeo = L2CAP_CONN_TIMEOUT; sock_reset_flag(sk, SOCK_ZAPPED); -- cgit v1.2.3 From 7bdb8a5cf17f66614a9897645efcd4ccc27535ee Mon Sep 17 00:00:00 2001 From: Szymon Janc Date: Tue, 26 Jul 2011 22:46:54 +0200 Subject: Bluetooth: Don't use cmd_timer to timeout HCI reset command No command should be send before Command Complete event for HCI reset is received. This fix regression introduced by commit 6bd32326cda(Bluetooth: Use proper timer for hci command timout) for chips whose reset command takes longer to complete (e.g. CSR) resulting in next command being send before HCI reset completed. Signed-off-by: Szymon Janc Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index fca62dcd7f1b..56943add45cc 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1209,7 +1209,6 @@ static void hci_cmd_timer(unsigned long arg) BT_ERR("%s command tx timeout", hdev->name); atomic_set(&hdev->cmd_cnt, 1); - clear_bit(HCI_RESET, &hdev->flags); tasklet_schedule(&hdev->cmd_task); } @@ -2408,7 +2407,10 @@ static void hci_cmd_task(unsigned long arg) if (hdev->sent_cmd) { atomic_dec(&hdev->cmd_cnt); hci_send_frame(skb); - mod_timer(&hdev->cmd_timer, + if (test_bit(HCI_RESET, &hdev->flags)) + del_timer(&hdev->cmd_timer); + else + mod_timer(&hdev->cmd_timer, jiffies + msecs_to_jiffies(HCI_CMD_TIMEOUT)); } else { skb_queue_head(&hdev->cmd_q, skb); -- cgit v1.2.3 From 751c10a56802513a6b057c8cf1552cecc1c9afde Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:41:35 -0400 Subject: Bluetooth: bnep: Fix deadlock in session deletion Commit f4d7cd4a4c introduced the usage of kthread API. kthread_stop is a blocking function which returns only when the thread exits. In this case, the thread can't exit because it's waiting for the write lock, which is being held by bnep_del_connection() which is waiting for the thread to exit -- deadlock. Use atomic_t/wake_up_process instead to signal to the thread to exit. Signed-off-by: Jaikumar Ganesh Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/bnep/bnep.h | 1 + net/bluetooth/bnep/core.c | 9 +++++---- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/bluetooth/bnep/bnep.h b/net/bluetooth/bnep/bnep.h index 8e6c06158f8e..e7ee5314f39a 100644 --- a/net/bluetooth/bnep/bnep.h +++ b/net/bluetooth/bnep/bnep.h @@ -155,6 +155,7 @@ struct bnep_session { unsigned int role; unsigned long state; unsigned long flags; + atomic_t terminate; struct task_struct *task; struct ethhdr eh; diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c index 7e8ff3c24942..d9edfe8bf9d6 100644 --- a/net/bluetooth/bnep/core.c +++ b/net/bluetooth/bnep/core.c @@ -487,7 +487,7 @@ static int bnep_session(void *arg) while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) + if (atomic_read(&s->terminate)) break; /* RX */ while ((skb = skb_dequeue(&sk->sk_receive_queue))) { @@ -642,9 +642,10 @@ int bnep_del_connection(struct bnep_conndel_req *req) down_read(&bnep_session_sem); s = __bnep_get_session(req->dst); - if (s) - kthread_stop(s->task); - else + if (s) { + atomic_inc(&s->terminate); + wake_up_process(s->task); + } else err = -ENOENT; up_read(&bnep_session_sem); -- cgit v1.2.3 From 7176522cdca1f0b78a1434b41761f0334511822a Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:44:21 -0400 Subject: Bluetooth: cmtp: Fix deadlock in session deletion Commit fada4ac339 introduced the usage of kthread API. kthread_stop is a blocking function which returns only when the thread exits. In this case, the thread can't exit because it's waiting for the write lock, which is being held by cmtp_del_connection() which is waiting for the thread to exit -- deadlock. Revert cmtp_reset_ctr to its original behavior: non-blocking signalling for the session to terminate. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/cmtp/capi.c | 3 ++- net/bluetooth/cmtp/cmtp.h | 1 + net/bluetooth/cmtp/core.c | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 040f67b12978..50f0d135eb8f 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -386,7 +386,8 @@ static void cmtp_reset_ctr(struct capi_ctr *ctrl) capi_ctr_down(ctrl); - kthread_stop(session->task); + atomic_inc(&session->terminate); + wake_up_process(session->task); } static void cmtp_register_appl(struct capi_ctr *ctrl, __u16 appl, capi_register_params *rp) diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h index db43b54ac9af..c32638dddbf9 100644 --- a/net/bluetooth/cmtp/cmtp.h +++ b/net/bluetooth/cmtp/cmtp.h @@ -81,6 +81,7 @@ struct cmtp_session { char name[BTNAMSIZ]; + atomic_t terminate; struct task_struct *task; wait_queue_head_t wait; diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 2eb854ab10f6..42cb2f4c0db1 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -295,7 +295,7 @@ static int cmtp_session(void *arg) while (1) { set_current_state(TASK_INTERRUPTIBLE); - if (kthread_should_stop()) + if (atomic_read(&session->terminate)) break; if (sk->sk_state != BT_CONNECTED) break; @@ -416,7 +416,8 @@ int cmtp_del_connection(struct cmtp_conndel_req *req) skb_queue_purge(&session->transmit); /* Stop session thread */ - kthread_stop(session->task); + atomic_inc(&session->terminate); + wake_up_process(session->task); } else err = -ENOENT; -- cgit v1.2.3 From e9d5cb541b22aa651edc29990092ec5f8174cd39 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:51:26 -0400 Subject: Bluetooth: hidp: Fix session cleanup on failed conn add Once the session thread is running, cleanup must be handled by the session thread only. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 43b4c2deb7cc..7e19a012970e 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -1044,8 +1044,12 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, } err = hid_add_device(session->hid); - if (err < 0) - goto err_add_device; + if (err < 0) { + atomic_inc(&session->terminate); + wake_up_process(session->task); + up_write(&hidp_session_sem); + return err; + } if (session->input) { hidp_send_ctrl_message(session, @@ -1059,12 +1063,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, up_write(&hidp_session_sem); return 0; -err_add_device: - hid_destroy_device(session->hid); - session->hid = NULL; - atomic_inc(&session->terminate); - wake_up_process(session->task); - unlink: hidp_del_timer(session); -- cgit v1.2.3 From 1c97e94c0b7c56319754ee6f9ccd2e93fe1ee2b3 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:51:34 -0400 Subject: Bluetooth: hidp: Fix memory leak of cached report descriptor Free the cached HID report descriptor on thread terminate. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 7e19a012970e..26f0d109ff41 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -764,6 +764,7 @@ static int hidp_session(void *arg) up_write(&hidp_session_sem); + kfree(session->rd_data); kfree(session); return 0; } -- cgit v1.2.3 From 615aedd6e5add8104f031b0d547285652d04d330 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:51:50 -0400 Subject: Bluetooth: hidp: Only free input device if failed register When an hidp connection is added for a boot protocol input device, only free the allocated device if device registration fails. Subsequent failures should only unregister the device (the input device api documents that unregister will also free the allocated device). Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 26f0d109ff41..a859f9078df6 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -842,6 +842,8 @@ static int hidp_setup_input(struct hidp_session *session, err = input_register_device(input); if (err < 0) { + input_free_device(input); + session->input = NULL; hci_conn_put_device(session->conn); return err; } @@ -1089,7 +1091,6 @@ purge: failed: up_write(&hidp_session_sem); - input_free_device(session->input); kfree(session); return err; } -- cgit v1.2.3 From ff062ea109217329b88693bc9081da893eb8b71b Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:52:01 -0400 Subject: Bluetooth: hidp: Don't release device ref if never held When an hidp connection is added for a boot protocol input device, don't release a device reference that was never acquired. The device reference is acquired when the session is linked to the session list (which hasn't happened yet when hidp_setup_input is called). Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hidp/core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index a859f9078df6..fb68f344c34a 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -844,7 +844,6 @@ static int hidp_setup_input(struct hidp_session *session, if (err < 0) { input_free_device(input); session->input = NULL; - hci_conn_put_device(session->conn); return err; } -- cgit v1.2.3 From 687beaa0d1d937c327e2f97b4b4fa6c23ca70624 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Fri, 5 Aug 2011 10:53:52 -0400 Subject: Bluetooth: cmtp: Fix session cleanup on failed conn add Once the session thread is running, cleanup must be handled by the session thread only. Signed-off-by: Peter Hurley Signed-off-by: Gustavo F. Padovan --- net/bluetooth/cmtp/core.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 42cb2f4c0db1..521baa4fe835 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -382,16 +382,17 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) if (!(session->flags & (1 << CMTP_LOOPBACK))) { err = cmtp_attach_device(session); - if (err < 0) - goto detach; + if (err < 0) { + atomic_inc(&session->terminate); + wake_up_process(session->task); + up_write(&cmtp_session_sem); + return err; + } } up_write(&cmtp_session_sem); return 0; -detach: - cmtp_detach_device(session); - unlink: __cmtp_unlink_session(session); -- cgit v1.2.3 From f09aecd50f39d35372e551491d9f36ff0f51ee4d Mon Sep 17 00:00:00 2001 From: Sangbeom Kim Date: Wed, 20 Jul 2011 17:07:13 +0900 Subject: ASoC: SAMSUNG: Add I2S0 internal dma driver I2S in Exynos4 and S5PC110(S5PV210) has a internal dma. It can be used low power audio mode and 2nd channel transfer. This patch can support idma. [Reapplied after dependencies propagated through in 3.1-rc1. --broonie] Signed-off-by: Sangbeom Kim Acked-by: Jassi Brar Acked-by: Liam Girdwood Acked-by: Jassi Brar Signed-off-by: Mark Brown --- sound/soc/samsung/Makefile | 2 + sound/soc/samsung/idma.c | 453 +++++++++++++++++++++++++++++++++++++++++++++ sound/soc/samsung/idma.h | 26 +++ 3 files changed, 481 insertions(+) create mode 100644 sound/soc/samsung/idma.c create mode 100644 sound/soc/samsung/idma.h diff --git a/sound/soc/samsung/Makefile b/sound/soc/samsung/Makefile index 9eb3b12eb72f..8509d3c4366e 100644 --- a/sound/soc/samsung/Makefile +++ b/sound/soc/samsung/Makefile @@ -1,5 +1,6 @@ # S3c24XX Platform Support snd-soc-s3c24xx-objs := dma.o +snd-soc-idma-objs := idma.o snd-soc-s3c24xx-i2s-objs := s3c24xx-i2s.o snd-soc-s3c2412-i2s-objs := s3c2412-i2s.o snd-soc-ac97-objs := ac97.o @@ -16,6 +17,7 @@ obj-$(CONFIG_SND_S3C_I2SV2_SOC) += snd-soc-s3c-i2s-v2.o obj-$(CONFIG_SND_SAMSUNG_SPDIF) += snd-soc-samsung-spdif.o obj-$(CONFIG_SND_SAMSUNG_PCM) += snd-soc-pcm.o obj-$(CONFIG_SND_SAMSUNG_I2S) += snd-soc-i2s.o +obj-$(CONFIG_SND_SAMSUNG_I2S) += snd-soc-idma.o # S3C24XX Machine Support snd-soc-jive-wm8750-objs := jive_wm8750.o diff --git a/sound/soc/samsung/idma.c b/sound/soc/samsung/idma.c new file mode 100644 index 000000000000..ebde0740ab19 --- /dev/null +++ b/sound/soc/samsung/idma.c @@ -0,0 +1,453 @@ +/* + * sound/soc/samsung/idma.c + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd. + * http://www.samsung.com + * + * I2S0's Internal DMA driver + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "i2s.h" +#include "idma.h" +#include "dma.h" +#include "i2s-regs.h" + +#define ST_RUNNING (1<<0) +#define ST_OPENED (1<<1) + +static const struct snd_pcm_hardware idma_hardware = { + .info = SNDRV_PCM_INFO_INTERLEAVED | + SNDRV_PCM_INFO_BLOCK_TRANSFER | + SNDRV_PCM_INFO_MMAP | + SNDRV_PCM_INFO_MMAP_VALID | + SNDRV_PCM_INFO_PAUSE | + SNDRV_PCM_INFO_RESUME, + .formats = SNDRV_PCM_FMTBIT_S16_LE | + SNDRV_PCM_FMTBIT_U16_LE | + SNDRV_PCM_FMTBIT_S24_LE | + SNDRV_PCM_FMTBIT_U24_LE | + SNDRV_PCM_FMTBIT_U8 | + SNDRV_PCM_FMTBIT_S8, + .channels_min = 2, + .channels_max = 2, + .buffer_bytes_max = MAX_IDMA_BUFFER, + .period_bytes_min = 128, + .period_bytes_max = MAX_IDMA_PERIOD, + .periods_min = 1, + .periods_max = 2, +}; + +struct idma_ctrl { + spinlock_t lock; + int state; + dma_addr_t start; + dma_addr_t pos; + dma_addr_t end; + dma_addr_t period; + dma_addr_t periodsz; + void *token; + void (*cb)(void *dt, int bytes_xfer); +}; + +static struct idma_info { + spinlock_t lock; + void __iomem *regs; + dma_addr_t lp_tx_addr; +} idma; + +static void idma_getpos(dma_addr_t *src) +{ + *src = idma.lp_tx_addr + + (readl(idma.regs + I2STRNCNT) & 0xffffff) * 4; +} + +static int idma_enqueue(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct idma_ctrl *prtd = substream->runtime->private_data; + u32 val; + + spin_lock(&prtd->lock); + prtd->token = (void *) substream; + spin_unlock(&prtd->lock); + + /* Internal DMA Level0 Interrupt Address */ + val = idma.lp_tx_addr + prtd->periodsz; + writel(val, idma.regs + I2SLVL0ADDR); + + /* Start address0 of I2S internal DMA operation. */ + val = idma.lp_tx_addr; + writel(val, idma.regs + I2SSTR0); + + /* + * Transfer block size for I2S internal DMA. + * Should decide transfer size before start dma operation + */ + val = readl(idma.regs + I2SSIZE); + val &= ~(I2SSIZE_TRNMSK << I2SSIZE_SHIFT); + val |= (((runtime->dma_bytes >> 2) & + I2SSIZE_TRNMSK) << I2SSIZE_SHIFT); + writel(val, idma.regs + I2SSIZE); + + val = readl(idma.regs + I2SAHB); + val |= AHB_INTENLVL0; + writel(val, idma.regs + I2SAHB); + + return 0; +} + +static void idma_setcallbk(struct snd_pcm_substream *substream, + void (*cb)(void *, int)) +{ + struct idma_ctrl *prtd = substream->runtime->private_data; + + spin_lock(&prtd->lock); + prtd->cb = cb; + spin_unlock(&prtd->lock); +} + +static void idma_control(int op) +{ + u32 val = readl(idma.regs + I2SAHB); + + spin_lock(&idma.lock); + + switch (op) { + case LPAM_DMA_START: + val |= (AHB_INTENLVL0 | AHB_DMAEN); + break; + case LPAM_DMA_STOP: + val &= ~(AHB_INTENLVL0 | AHB_DMAEN); + break; + default: + spin_unlock(&idma.lock); + return; + } + + writel(val, idma.regs + I2SAHB); + spin_unlock(&idma.lock); +} + +static void idma_done(void *id, int bytes_xfer) +{ + struct snd_pcm_substream *substream = id; + struct idma_ctrl *prtd = substream->runtime->private_data; + + if (prtd && (prtd->state & ST_RUNNING)) + snd_pcm_period_elapsed(substream); +} + +static int idma_hw_params(struct snd_pcm_substream *substream, + struct snd_pcm_hw_params *params) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct idma_ctrl *prtd = substream->runtime->private_data; + u32 mod = readl(idma.regs + I2SMOD); + u32 ahb = readl(idma.regs + I2SAHB); + + ahb |= (AHB_DMARLD | AHB_INTMASK); + mod |= MOD_TXS_IDMA; + writel(ahb, idma.regs + I2SAHB); + writel(mod, idma.regs + I2SMOD); + + snd_pcm_set_runtime_buffer(substream, &substream->dma_buffer); + runtime->dma_bytes = params_buffer_bytes(params); + + prtd->start = prtd->pos = runtime->dma_addr; + prtd->period = params_periods(params); + prtd->periodsz = params_period_bytes(params); + prtd->end = runtime->dma_addr + runtime->dma_bytes; + + idma_setcallbk(substream, idma_done); + + return 0; +} + +static int idma_hw_free(struct snd_pcm_substream *substream) +{ + snd_pcm_set_runtime_buffer(substream, NULL); + + return 0; +} + +static int idma_prepare(struct snd_pcm_substream *substream) +{ + struct idma_ctrl *prtd = substream->runtime->private_data; + + prtd->pos = prtd->start; + + /* flush the DMA channel */ + idma_control(LPAM_DMA_STOP); + idma_enqueue(substream); + + return 0; +} + +static int idma_trigger(struct snd_pcm_substream *substream, int cmd) +{ + struct idma_ctrl *prtd = substream->runtime->private_data; + int ret = 0; + + spin_lock(&prtd->lock); + + switch (cmd) { + case SNDRV_PCM_TRIGGER_RESUME: + case SNDRV_PCM_TRIGGER_START: + case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: + prtd->state |= ST_RUNNING; + idma_control(LPAM_DMA_START); + break; + + case SNDRV_PCM_TRIGGER_SUSPEND: + case SNDRV_PCM_TRIGGER_STOP: + case SNDRV_PCM_TRIGGER_PAUSE_PUSH: + prtd->state &= ~ST_RUNNING; + idma_control(LPAM_DMA_STOP); + break; + + default: + ret = -EINVAL; + break; + } + + spin_unlock(&prtd->lock); + + return ret; +} + +static snd_pcm_uframes_t + idma_pointer(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct idma_ctrl *prtd = runtime->private_data; + dma_addr_t src; + unsigned long res; + + spin_lock(&prtd->lock); + + idma_getpos(&src); + res = src - prtd->start; + + spin_unlock(&prtd->lock); + + return bytes_to_frames(substream->runtime, res); +} + +static int idma_mmap(struct snd_pcm_substream *substream, + struct vm_area_struct *vma) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + unsigned long size, offset; + int ret; + + /* From snd_pcm_lib_mmap_iomem */ + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_IO; + size = vma->vm_end - vma->vm_start; + offset = vma->vm_pgoff << PAGE_SHIFT; + ret = io_remap_pfn_range(vma, vma->vm_start, + (runtime->dma_addr + offset) >> PAGE_SHIFT, + size, vma->vm_page_prot); + + return ret; +} + +static irqreturn_t iis_irq(int irqno, void *dev_id) +{ + struct idma_ctrl *prtd = (struct idma_ctrl *)dev_id; + u32 iiscon, iisahb, val, addr; + + iisahb = readl(idma.regs + I2SAHB); + iiscon = readl(idma.regs + I2SCON); + + val = (iisahb & AHB_LVL0INT) ? AHB_CLRLVL0INT : 0; + + if (val) { + iisahb |= val; + writel(iisahb, idma.regs + I2SAHB); + + addr = readl(idma.regs + I2SLVL0ADDR) - idma.lp_tx_addr; + addr += prtd->periodsz; + addr %= (prtd->end - prtd->start); + addr += idma.lp_tx_addr; + + writel(addr, idma.regs + I2SLVL0ADDR); + + if (prtd->cb) + prtd->cb(prtd->token, prtd->period); + } + + return IRQ_HANDLED; +} + +static int idma_open(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct idma_ctrl *prtd; + int ret; + + snd_soc_set_runtime_hwparams(substream, &idma_hardware); + + prtd = kzalloc(sizeof(struct idma_ctrl), GFP_KERNEL); + if (prtd == NULL) + return -ENOMEM; + + ret = request_irq(IRQ_I2S0, iis_irq, 0, "i2s", prtd); + if (ret < 0) { + pr_err("fail to claim i2s irq , ret = %d\n", ret); + kfree(prtd); + return ret; + } + + spin_lock_init(&prtd->lock); + + runtime->private_data = prtd; + + return 0; +} + +static int idma_close(struct snd_pcm_substream *substream) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct idma_ctrl *prtd = runtime->private_data; + + free_irq(IRQ_I2S0, prtd); + + if (!prtd) + pr_err("idma_close called with prtd == NULL\n"); + + kfree(prtd); + + return 0; +} + +static struct snd_pcm_ops idma_ops = { + .open = idma_open, + .close = idma_close, + .ioctl = snd_pcm_lib_ioctl, + .trigger = idma_trigger, + .pointer = idma_pointer, + .mmap = idma_mmap, + .hw_params = idma_hw_params, + .hw_free = idma_hw_free, + .prepare = idma_prepare, +}; + +static void idma_free(struct snd_pcm *pcm) +{ + struct snd_pcm_substream *substream; + struct snd_dma_buffer *buf; + + substream = pcm->streams[SNDRV_PCM_STREAM_PLAYBACK].substream; + if (!substream) + return; + + buf = &substream->dma_buffer; + if (!buf->area) + return; + + iounmap(buf->area); + + buf->area = NULL; + buf->addr = 0; +} + +static int preallocate_idma_buffer(struct snd_pcm *pcm, int stream) +{ + struct snd_pcm_substream *substream = pcm->streams[stream].substream; + struct snd_dma_buffer *buf = &substream->dma_buffer; + + buf->dev.dev = pcm->card->dev; + buf->private_data = NULL; + + /* Assign PCM buffer pointers */ + buf->dev.type = SNDRV_DMA_TYPE_CONTINUOUS; + buf->addr = idma.lp_tx_addr; + buf->bytes = idma_hardware.buffer_bytes_max; + buf->area = (unsigned char *)ioremap(buf->addr, buf->bytes); + + return 0; +} + +static u64 idma_mask = DMA_BIT_MASK(32); + +static int idma_new(struct snd_soc_pcm_runtime *rtd) +{ + struct snd_card *card = rtd->card->snd_card; + struct snd_soc_dai *dai = rtd->cpu_dai; + struct snd_pcm *pcm = rtd->pcm; + int ret = 0; + + if (!card->dev->dma_mask) + card->dev->dma_mask = &idma_mask; + if (!card->dev->coherent_dma_mask) + card->dev->coherent_dma_mask = DMA_BIT_MASK(32); + + if (dai->driver->playback.channels_min) + ret = preallocate_idma_buffer(pcm, + SNDRV_PCM_STREAM_PLAYBACK); + + return ret; +} + +void idma_reg_addr_init(void *regs, dma_addr_t addr) +{ + spin_lock_init(&idma.lock); + idma.regs = regs; + idma.lp_tx_addr = addr; +} + +struct snd_soc_platform_driver asoc_idma_platform = { + .ops = &idma_ops, + .pcm_new = idma_new, + .pcm_free = idma_free, +}; + +static int __devinit asoc_idma_platform_probe(struct platform_device *pdev) +{ + return snd_soc_register_platform(&pdev->dev, &asoc_idma_platform); +} + +static int __devexit asoc_idma_platform_remove(struct platform_device *pdev) +{ + snd_soc_unregister_platform(&pdev->dev); + return 0; +} + +static struct platform_driver asoc_idma_driver = { + .driver = { + .name = "samsung-idma", + .owner = THIS_MODULE, + }, + + .probe = asoc_idma_platform_probe, + .remove = __devexit_p(asoc_idma_platform_remove), +}; + +static int __init asoc_idma_init(void) +{ + return platform_driver_register(&asoc_idma_driver); +} +module_init(asoc_idma_init); + +static void __exit asoc_idma_exit(void) +{ + platform_driver_unregister(&asoc_idma_driver); +} +module_exit(asoc_idma_exit); + +MODULE_AUTHOR("Jaswinder Singh, "); +MODULE_DESCRIPTION("Samsung ASoC IDMA Driver"); +MODULE_LICENSE("GPL"); diff --git a/sound/soc/samsung/idma.h b/sound/soc/samsung/idma.h new file mode 100644 index 000000000000..48273216166e --- /dev/null +++ b/sound/soc/samsung/idma.h @@ -0,0 +1,26 @@ +/* + * sound/soc/samsung/idma.h + * + * Copyright (c) 2011 Samsung Electronics Co., Ltd + * http://www.samsung.com + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __SND_SOC_SAMSUNG_IDMA_H_ +#define __SND_SOC_SAMSUNG_IDMA_H_ + +extern void idma_reg_addr_init(void *regs, dma_addr_t addr); + +/* dma_state */ +#define LPAM_DMA_STOP 0 +#define LPAM_DMA_START 1 + +#define MAX_IDMA_PERIOD (128 * 1024) +#define MAX_IDMA_BUFFER (160 * 1024) + +#endif /* __SND_SOC_SAMSUNG_IDMA_H_ */ -- cgit v1.2.3 From b33f9cbd67ba1a1c46879ec66467269f09cde8e5 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Thu, 11 Aug 2011 11:59:10 -0600 Subject: regmap: Specify a module license CONFIG_REGMAP_I2C/SPI are set to m when selected by a tristate config option that's set to m. The regmap modules don't specify a license, so fail to link to regmap_init at load time, since that is EXPORT_SYMBOL_GPL. Fix this by specifying a license for the regmap modules. Signed-off-by: Stephen Warren Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-i2c.c | 1 + drivers/base/regmap/regmap-spi.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c index c2231ff06cbc..c4f7a45cd2c3 100644 --- a/drivers/base/regmap/regmap-i2c.c +++ b/drivers/base/regmap/regmap-i2c.c @@ -113,3 +113,4 @@ struct regmap *regmap_init_i2c(struct i2c_client *i2c, } EXPORT_SYMBOL_GPL(regmap_init_i2c); +MODULE_LICENSE("GPL"); diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c index 4deba0621bc7..2bbc65999a5f 100644 --- a/drivers/base/regmap/regmap-spi.c +++ b/drivers/base/regmap/regmap-spi.c @@ -70,3 +70,5 @@ struct regmap *regmap_init_spi(struct spi_device *spi, return regmap_init(&spi->dev, ®map_spi, config); } EXPORT_SYMBOL_GPL(regmap_init_spi); + +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 7ec41ee5ad5f716f67041c0d49014d0becb5332c Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 11 Aug 2011 15:44:57 +0300 Subject: ASoC: omap: Update e-mail address of Jarkko Nikula My gmail account got disabled and I'm not going to reopen it. Signed-off-by: Jarkko Nikula Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- MAINTAINERS | 2 +- include/sound/tlv320aic3x.h | 2 +- sound/soc/omap/n810.c | 4 ++-- sound/soc/omap/omap-mcbsp.c | 4 ++-- sound/soc/omap/omap-mcbsp.h | 2 +- sound/soc/omap/omap-pcm.c | 4 ++-- sound/soc/omap/omap-pcm.h | 2 +- sound/soc/omap/rx51.c | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 51d42fbc8dc4..46e3e6b99220 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4604,7 +4604,7 @@ F: arch/arm/mach-omap2/clockdomain2xxx_3xxx.c F: arch/arm/mach-omap2/clockdomain44xx.c OMAP AUDIO SUPPORT -M: Jarkko Nikula +M: Jarkko Nikula L: alsa-devel@alsa-project.org (subscribers-only) L: linux-omap@vger.kernel.org S: Maintained diff --git a/include/sound/tlv320aic3x.h b/include/sound/tlv320aic3x.h index 99e0308bf2c2..ffd9bc793105 100644 --- a/include/sound/tlv320aic3x.h +++ b/include/sound/tlv320aic3x.h @@ -1,7 +1,7 @@ /* * Platform data for Texas Instruments TLV320AIC3x codec * - * Author: Jarkko Nikula + * Author: Jarkko Nikula * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as diff --git a/sound/soc/omap/n810.c b/sound/soc/omap/n810.c index 83d213bfd3d1..62e292f49313 100644 --- a/sound/soc/omap/n810.c +++ b/sound/soc/omap/n810.c @@ -3,7 +3,7 @@ * * Copyright (C) 2008 Nokia Corporation * - * Contact: Jarkko Nikula + * Contact: Jarkko Nikula * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -402,6 +402,6 @@ static void __exit n810_soc_exit(void) module_init(n810_soc_init); module_exit(n810_soc_exit); -MODULE_AUTHOR("Jarkko Nikula "); +MODULE_AUTHOR("Jarkko Nikula "); MODULE_DESCRIPTION("ALSA SoC Nokia N810"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/omap/omap-mcbsp.c b/sound/soc/omap/omap-mcbsp.c index 07b772357244..ebcc2d4d2b18 100644 --- a/sound/soc/omap/omap-mcbsp.c +++ b/sound/soc/omap/omap-mcbsp.c @@ -3,7 +3,7 @@ * * Copyright (C) 2008 Nokia Corporation * - * Contact: Jarkko Nikula + * Contact: Jarkko Nikula * Peter Ujfalusi * * This program is free software; you can redistribute it and/or @@ -780,6 +780,6 @@ static void __exit snd_omap_mcbsp_exit(void) } module_exit(snd_omap_mcbsp_exit); -MODULE_AUTHOR("Jarkko Nikula "); +MODULE_AUTHOR("Jarkko Nikula "); MODULE_DESCRIPTION("OMAP I2S SoC Interface"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/omap/omap-mcbsp.h b/sound/soc/omap/omap-mcbsp.h index 9a7dedd6f5a9..65cde9d3807b 100644 --- a/sound/soc/omap/omap-mcbsp.h +++ b/sound/soc/omap/omap-mcbsp.h @@ -3,7 +3,7 @@ * * Copyright (C) 2008 Nokia Corporation * - * Contact: Jarkko Nikula + * Contact: Jarkko Nikula * Peter Ujfalusi * * This program is free software; you can redistribute it and/or diff --git a/sound/soc/omap/omap-pcm.c b/sound/soc/omap/omap-pcm.c index b2f5751edae3..9b5c88ac35b9 100644 --- a/sound/soc/omap/omap-pcm.c +++ b/sound/soc/omap/omap-pcm.c @@ -3,7 +3,7 @@ * * Copyright (C) 2008 Nokia Corporation * - * Contact: Jarkko Nikula + * Contact: Jarkko Nikula * Peter Ujfalusi * * This program is free software; you can redistribute it and/or @@ -436,6 +436,6 @@ static void __exit snd_omap_pcm_exit(void) } module_exit(snd_omap_pcm_exit); -MODULE_AUTHOR("Jarkko Nikula "); +MODULE_AUTHOR("Jarkko Nikula "); MODULE_DESCRIPTION("OMAP PCM DMA module"); MODULE_LICENSE("GPL"); diff --git a/sound/soc/omap/omap-pcm.h b/sound/soc/omap/omap-pcm.h index a0ed1dbb52d6..f95fe3064172 100644 --- a/sound/soc/omap/omap-pcm.h +++ b/sound/soc/omap/omap-pcm.h @@ -3,7 +3,7 @@ * * Copyright (C) 2008 Nokia Corporation * - * Contact: Jarkko Nikula + * Contact: Jarkko Nikula * Peter Ujfalusi * * This program is free software; you can redistribute it and/or diff --git a/sound/soc/omap/rx51.c b/sound/soc/omap/rx51.c index 0aae998b6540..893300a53bab 100644 --- a/sound/soc/omap/rx51.c +++ b/sound/soc/omap/rx51.c @@ -5,7 +5,7 @@ * * Contact: Peter Ujfalusi * Eduardo Valentin - * Jarkko Nikula + * Jarkko Nikula * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License -- cgit v1.2.3 From 567b20e02bc1b2bcd69e8bfc022dec3da3fefb89 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 12 Jul 2011 19:05:29 +0800 Subject: usb: gadget: s3c2410_udc: fix unterminated platform_device_id table platform_device_id structures need a NULL terminating entry, add it. Signed-off-by: Axel Lin Signed-off-by: Felipe Balbi --- drivers/usb/gadget/s3c2410_udc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/s3c2410_udc.c b/drivers/usb/gadget/s3c2410_udc.c index 85c1b0d66293..8d31848aab09 100644 --- a/drivers/usb/gadget/s3c2410_udc.c +++ b/drivers/usb/gadget/s3c2410_udc.c @@ -2060,6 +2060,7 @@ static int s3c2410_udc_resume(struct platform_device *pdev) static const struct platform_device_id s3c_udc_ids[] = { { "s3c2410-usbgadget", }, { "s3c2440-usbgadget", }, + { } }; MODULE_DEVICE_TABLE(platform, s3c_udc_ids); -- cgit v1.2.3 From aba1350fdac7cb52f86e6818addb26d03b3ef9bc Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 19 Jul 2011 21:47:01 +0200 Subject: usb: gadget: fusb300: remove #if 0 block The code in this block is unused and the Author is fine with removing: | These functions were used to debug unstable hw fifo while developing | fusb300. It's much more stable now. | So these functions can be removed. Cc: "Wendy Yuan-Hsin Chen" Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Felipe Balbi --- drivers/usb/gadget/fusb300_udc.c | 101 --------------------------------------- 1 file changed, 101 deletions(-) diff --git a/drivers/usb/gadget/fusb300_udc.c b/drivers/usb/gadget/fusb300_udc.c index 24a924330c81..4ec888f90002 100644 --- a/drivers/usb/gadget/fusb300_udc.c +++ b/drivers/usb/gadget/fusb300_udc.c @@ -609,107 +609,6 @@ void fusb300_rdcxf(struct fusb300 *fusb300, } } -#if 0 -static void fusb300_dbg_fifo(struct fusb300_ep *ep, - u8 entry, u16 length) -{ - u32 reg; - u32 i = 0; - u32 j = 0; - - reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_GTM); - reg &= ~(FUSB300_GTM_TST_EP_ENTRY(0xF) | - FUSB300_GTM_TST_EP_NUM(0xF) | FUSB300_GTM_TST_FIFO_DEG); - reg |= (FUSB300_GTM_TST_EP_ENTRY(entry) | - FUSB300_GTM_TST_EP_NUM(ep->epnum) | FUSB300_GTM_TST_FIFO_DEG); - iowrite32(reg, ep->fusb300->reg + FUSB300_OFFSET_GTM); - - for (i = 0; i < (length >> 2); i++) { - if (i * 4 == 1024) - break; - reg = ioread32(ep->fusb300->reg + - FUSB300_OFFSET_BUFDBG_START + i * 4); - printk(KERN_DEBUG" 0x%-8x", reg); - j++; - if ((j % 4) == 0) - printk(KERN_DEBUG "\n"); - } - - if (length % 4) { - reg = ioread32(ep->fusb300->reg + - FUSB300_OFFSET_BUFDBG_START + i * 4); - printk(KERN_DEBUG " 0x%x\n", reg); - } - - if ((j % 4) != 0) - printk(KERN_DEBUG "\n"); - - fusb300_disable_bit(ep->fusb300, FUSB300_OFFSET_GTM, - FUSB300_GTM_TST_FIFO_DEG); -} - -static void fusb300_cmp_dbg_fifo(struct fusb300_ep *ep, - u8 entry, u16 length, u8 *golden) -{ - u32 reg; - u32 i = 0; - u32 golden_value; - u8 *tmp; - - tmp = golden; - - printk(KERN_DEBUG "fusb300_cmp_dbg_fifo (entry %d) : start\n", entry); - - reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_GTM); - reg &= ~(FUSB300_GTM_TST_EP_ENTRY(0xF) | - FUSB300_GTM_TST_EP_NUM(0xF) | FUSB300_GTM_TST_FIFO_DEG); - reg |= (FUSB300_GTM_TST_EP_ENTRY(entry) | - FUSB300_GTM_TST_EP_NUM(ep->epnum) | FUSB300_GTM_TST_FIFO_DEG); - iowrite32(reg, ep->fusb300->reg + FUSB300_OFFSET_GTM); - - for (i = 0; i < (length >> 2); i++) { - if (i * 4 == 1024) - break; - golden_value = *tmp | *(tmp + 1) << 8 | - *(tmp + 2) << 16 | *(tmp + 3) << 24; - - reg = ioread32(ep->fusb300->reg + - FUSB300_OFFSET_BUFDBG_START + i*4); - - if (reg != golden_value) { - printk(KERN_DEBUG "0x%x : ", (u32)(ep->fusb300->reg + - FUSB300_OFFSET_BUFDBG_START + i*4)); - printk(KERN_DEBUG " golden = 0x%x, reg = 0x%x\n", - golden_value, reg); - } - tmp += 4; - } - - switch (length % 4) { - case 1: - golden_value = *tmp; - case 2: - golden_value = *tmp | *(tmp + 1) << 8; - case 3: - golden_value = *tmp | *(tmp + 1) << 8 | *(tmp + 2) << 16; - default: - break; - - reg = ioread32(ep->fusb300->reg + FUSB300_OFFSET_BUFDBG_START + i*4); - if (reg != golden_value) { - printk(KERN_DEBUG "0x%x:", (u32)(ep->fusb300->reg + - FUSB300_OFFSET_BUFDBG_START + i*4)); - printk(KERN_DEBUG " golden = 0x%x, reg = 0x%x\n", - golden_value, reg); - } - } - - printk(KERN_DEBUG "fusb300_cmp_dbg_fifo : end\n"); - fusb300_disable_bit(ep->fusb300, FUSB300_OFFSET_GTM, - FUSB300_GTM_TST_FIFO_DEG); -} -#endif - static void fusb300_rdfifo(struct fusb300_ep *ep, struct fusb300_request *req, u32 length) -- cgit v1.2.3 From 6a22158c596c1531b143c884d479285ef90608d1 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 19 Jul 2011 20:21:52 +0200 Subject: usb: gadget: composite: fix bMaxPacketSize for SuperSpeed For bMaxPacketSize0 we usually take what is specified in ep0->maxpacket. This is fine in most cases, however on SuperSpeed bMaxPacketSize0 specifies the exponent instead of the actual size in bytes. The only valid value on SS is 9 which denotes 512 bytes. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Felipe Balbi --- drivers/usb/gadget/composite.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 5ef87794fd32..aef47414f5d5 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1079,10 +1079,12 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) cdev->desc.bMaxPacketSize0 = cdev->gadget->ep0->maxpacket; if (gadget_is_superspeed(gadget)) { - if (gadget->speed >= USB_SPEED_SUPER) + if (gadget->speed >= USB_SPEED_SUPER) { cdev->desc.bcdUSB = cpu_to_le16(0x0300); - else + cdev->desc.bMaxPacketSize0 = 9; + } else { cdev->desc.bcdUSB = cpu_to_le16(0x0210); + } } value = min(w_length, (u16) sizeof cdev->desc); -- cgit v1.2.3 From 74c6f3a42a5af424dd954916a5e69d00271b943a Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Sun, 17 Jul 2011 18:28:00 +0300 Subject: usb: musb: tusb6010_omap: fix build failure: error: 'musb' undeclared CC drivers/usb/musb/tusb6010_omap.o drivers/usb/musb/tusb6010_omap.c: In function 'tusb_omap_use_shared_dmareq': drivers/usb/musb/tusb6010_omap.c:92: error: 'musb' undeclared (first use in this function) drivers/usb/musb/tusb6010_omap.c:92: error: (Each undeclared identifier is reported only once drivers/usb/musb/tusb6010_omap.c:92: error: for each function it appears in.) Signed-off-by: Sergei Trofimovich Signed-off-by: Felipe Balbi --- drivers/usb/musb/tusb6010_omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c index c784e6c03aac..07c8a73dfe41 100644 --- a/drivers/usb/musb/tusb6010_omap.c +++ b/drivers/usb/musb/tusb6010_omap.c @@ -89,7 +89,7 @@ static inline int tusb_omap_use_shared_dmareq(struct tusb_omap_dma_ch *chdat) u32 reg = musb_readl(chdat->tbase, TUSB_DMA_EP_MAP); if (reg != 0) { - dev_dbg(musb->controller, "ep%i dmareq0 is busy for ep%i\n", + dev_dbg(chdat->musb->controller, "ep%i dmareq0 is busy for ep%i\n", chdat->epnum, reg & 0xf); return -EAGAIN; } -- cgit v1.2.3 From 26e5c3e227d15a44402e1c9ab817fe48142b4b99 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Mon, 18 Jul 2011 18:38:47 +0530 Subject: usb: musb: fix Kconfig After 622859634 (usb: musb: drop a gigantic amount of ifdeferry): - USB_GADGET_MUSB_HDRC is no longer selectable because it depends on the removed USB_MUSB_PERIPHERAL and USB_MUSB_OTG options - The Kconfig comment still says "Enable Host or Gadget support to see Inventra options", even though you now need to enable both of them to see Inventra options. Fix the dependency and drop the anyway unnecessary comment. Signed-off-by: Rabin Vincent Signed-off-by: Felipe Balbi --- drivers/usb/gadget/Kconfig | 2 +- drivers/usb/musb/Kconfig | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig index 44b6b40aafb4..5a084b9cfa3c 100644 --- a/drivers/usb/gadget/Kconfig +++ b/drivers/usb/gadget/Kconfig @@ -310,7 +310,7 @@ config USB_PXA_U2O # musb builds in ../musb along with host support config USB_GADGET_MUSB_HDRC tristate "Inventra HDRC USB Peripheral (TI, ADI, ...)" - depends on USB_MUSB_HDRC && (USB_MUSB_PERIPHERAL || USB_MUSB_OTG) + depends on USB_MUSB_HDRC select USB_GADGET_DUALSPEED help This OTG-capable silicon IP is used in dual designs including diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 6192b45959f4..fc34b8b11910 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -3,9 +3,6 @@ # for silicon based on Mentor Graphics INVENTRA designs # -comment "Enable Host or Gadget support to see Inventra options" - depends on !USB && USB_GADGET=n - # (M)HDRC = (Multipoint) Highspeed Dual-Role Controller config USB_MUSB_HDRC depends on USB && USB_GADGET -- cgit v1.2.3 From 71964b9a0c06f2804be3b6ff47fab07a9468ecb4 Mon Sep 17 00:00:00 2001 From: Sebastian Bauer Date: Thu, 21 Jul 2011 15:40:07 +0200 Subject: usb: gadget: hid: don't STALL when processing a HID Descriptor request This is a patch to fix an issue with the HID gadget which, at the moment, returns STALL on a HID descriptor request. Essentially, the patch changes the hid gadget such that a request for the HID descriptor is handled by copying the descriptor into the response buffer, rather than falling through the default case, in which the request is answered by a STALL. Signed-off-by: Sebastian Bauer Acked-by: Peter Korsgaard Signed-off-by: Felipe Balbi --- drivers/usb/gadget/f_hid.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/usb/gadget/f_hid.c b/drivers/usb/gadget/f_hid.c index 403a48bcf560..83a266bdb40e 100644 --- a/drivers/usb/gadget/f_hid.c +++ b/drivers/usb/gadget/f_hid.c @@ -367,6 +367,13 @@ static int hidg_setup(struct usb_function *f, case ((USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_INTERFACE) << 8 | USB_REQ_GET_DESCRIPTOR): switch (value >> 8) { + case HID_DT_HID: + VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: HID\n"); + length = min_t(unsigned short, length, + hidg_desc.bLength); + memcpy(req->buf, &hidg_desc, length); + goto respond; + break; case HID_DT_REPORT: VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: REPORT\n"); length = min_t(unsigned short, length, -- cgit v1.2.3 From 15154962f777e4ab38adb7641ccae92194c9a96b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 28 Jul 2011 22:59:53 +0800 Subject: usb: host: ehci-omap: fix .remove and failure handling path of .probe(v1) Obviously, disabling & put regulator and iounmap(hcd->regs) are missed in .remove and failure handling path of .probe, so add them. Signed-off-by: Ming Lei Acked-by: Alan Stern Tested-by: Keshava Munegowda Signed-off-by: Felipe Balbi --- drivers/usb/host/ehci-omap.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/usb/host/ehci-omap.c b/drivers/usb/host/ehci-omap.c index 55a57c23dd0f..45240321ca09 100644 --- a/drivers/usb/host/ehci-omap.c +++ b/drivers/usb/host/ehci-omap.c @@ -98,6 +98,18 @@ static void omap_ehci_soft_phy_reset(struct platform_device *pdev, u8 port) } } +static void disable_put_regulator( + struct ehci_hcd_omap_platform_data *pdata) +{ + int i; + + for (i = 0 ; i < OMAP3_HS_USB_PORTS ; i++) { + if (pdata->regulator[i]) { + regulator_disable(pdata->regulator[i]); + regulator_put(pdata->regulator[i]); + } + } +} /* configure so an HC device and id are always provided */ /* always called with process context; sleeping is OK */ @@ -231,9 +243,11 @@ err_add_hcd: omap_usbhs_disable(dev); err_enable: + disable_put_regulator(pdata); usb_put_hcd(hcd); err_io: + iounmap(regs); return ret; } @@ -253,6 +267,8 @@ static int ehci_hcd_omap_remove(struct platform_device *pdev) usb_remove_hcd(hcd); omap_usbhs_disable(dev); + disable_put_regulator(dev->platform_data); + iounmap(hcd->regs); usb_put_hcd(hcd); return 0; } -- cgit v1.2.3 From 93e098a8fc02c579875e64001f7a511b7e75a16c Mon Sep 17 00:00:00 2001 From: John Stultz Date: Wed, 20 Jul 2011 17:09:34 -0700 Subject: usb: musb: fix oops on musb_gadget_pullup an 'unhandled fault' is causes when a gadget driver calls usb_gadget_connect() while the USB cable isn't plugged into the OTG port. the fault is caused by an access to MUSB's memory space while its clock is turned off due to pm_runtime kicking in. in order to fix the fault, we enclose musb_gadget_pullup() with pm_runtime_get_sync() ... pm_runtime_put() calls to be sure we will always reach that path with clock turned on. [ balbi@ti.com : simplified commit log; removed few things which didn't belong there ] Cc: stable@kernel.org Reported-by: Zach Pfeffer Signed-off-by: John Stultz Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_gadget.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index b67a062f556b..8c41a2e6ea77 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1698,6 +1698,8 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on) is_on = !!is_on; + pm_runtime_get_sync(musb->controller); + /* NOTE: this assumes we are sensing vbus; we'd rather * not pullup unless the B-session is active. */ @@ -1707,6 +1709,9 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on) musb_pullup(musb, is_on); } spin_unlock_irqrestore(&musb->lock, flags); + + pm_runtime_put(musb->controller); + return 0; } -- cgit v1.2.3 From d366d39bab562545ccb4a5931d62d0fd9e6a8ffc Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Tue, 2 Aug 2011 17:33:39 +0200 Subject: usb: musb: ux500: set dma config for both src and dst The dma driver requires both src and dst to be set. This fix is needed in order to run gadget mass storage. Patch is verified on snowball. Signed-off-by: Per Forlin Acked-by: Mian Yousaf Kaukab Acked-by: Linus Walleij Signed-off-by: Felipe Balbi --- drivers/usb/musb/ux500_dma.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/drivers/usb/musb/ux500_dma.c b/drivers/usb/musb/ux500_dma.c index cecace411832..23134754b7c0 100644 --- a/drivers/usb/musb/ux500_dma.c +++ b/drivers/usb/musb/ux500_dma.c @@ -133,15 +133,13 @@ static bool ux500_configure_channel(struct dma_channel *channel, DMA_SLAVE_BUSWIDTH_4_BYTES; slave_conf.direction = direction; - if (direction == DMA_FROM_DEVICE) { - slave_conf.src_addr = usb_fifo_addr; - slave_conf.src_addr_width = addr_width; - slave_conf.src_maxburst = 16; - } else { - slave_conf.dst_addr = usb_fifo_addr; - slave_conf.dst_addr_width = addr_width; - slave_conf.dst_maxburst = 16; - } + slave_conf.src_addr = usb_fifo_addr; + slave_conf.src_addr_width = addr_width; + slave_conf.src_maxburst = 16; + slave_conf.dst_addr = usb_fifo_addr; + slave_conf.dst_addr_width = addr_width; + slave_conf.dst_maxburst = 16; + dma_chan->device->device_control(dma_chan, DMA_SLAVE_CONFIG, (unsigned long) &slave_conf); -- cgit v1.2.3 From afbd0749c0507d5fea980b3bfa76efc43af83d60 Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Wed, 3 Aug 2011 14:22:17 +0200 Subject: usb: musb: ux500: replace missing DBG with dev_dbg ux500_dma.c fail to compile becase DBG has been removed from musb_debug. Use dev_dbg for all prints. Cc: stable@vger.kernel.org Signed-off-by: Per Forlin Acked-by: Mian Yousaf Kaukab Signed-off-by: Felipe Balbi --- drivers/usb/musb/ux500_dma.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/usb/musb/ux500_dma.c b/drivers/usb/musb/ux500_dma.c index 23134754b7c0..ef4333f4bbe0 100644 --- a/drivers/usb/musb/ux500_dma.c +++ b/drivers/usb/musb/ux500_dma.c @@ -65,7 +65,8 @@ static void ux500_tx_work(struct work_struct *data) struct musb *musb = hw_ep->musb; unsigned long flags; - DBG(4, "DMA tx transfer done on hw_ep=%d\n", hw_ep->epnum); + dev_dbg(musb->controller, "DMA tx transfer done on hw_ep=%d\n", + hw_ep->epnum); spin_lock_irqsave(&musb->lock, flags); ux500_channel->channel.actual_len = ux500_channel->cur_len; @@ -84,7 +85,8 @@ static void ux500_rx_work(struct work_struct *data) struct musb *musb = hw_ep->musb; unsigned long flags; - DBG(4, "DMA rx transfer done on hw_ep=%d\n", hw_ep->epnum); + dev_dbg(musb->controller, "DMA rx transfer done on hw_ep=%d\n", + hw_ep->epnum); spin_lock_irqsave(&musb->lock, flags); ux500_channel->channel.actual_len = ux500_channel->cur_len; @@ -116,9 +118,11 @@ static bool ux500_configure_channel(struct dma_channel *channel, enum dma_slave_buswidth addr_width; dma_addr_t usb_fifo_addr = (MUSB_FIFO_OFFSET(hw_ep->epnum) + ux500_channel->controller->phy_base); + struct musb *musb = ux500_channel->controller->private_data; - DBG(4, "packet_sz=%d, mode=%d, dma_addr=0x%x, len=%d is_tx=%d\n", - packet_sz, mode, dma_addr, len, ux500_channel->is_tx); + dev_dbg(musb->controller, + "packet_sz=%d, mode=%d, dma_addr=0x%x, len=%d is_tx=%d\n", + packet_sz, mode, dma_addr, len, ux500_channel->is_tx); ux500_channel->cur_len = len; @@ -164,6 +168,7 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c, struct ux500_dma_controller *controller = container_of(c, struct ux500_dma_controller, controller); struct ux500_dma_channel *ux500_channel = NULL; + struct musb *musb = controller->private_data; u8 ch_num = hw_ep->epnum - 1; u32 max_ch; @@ -190,7 +195,7 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c, ux500_channel->hw_ep = hw_ep; ux500_channel->is_allocated = 1; - DBG(7, "hw_ep=%d, is_tx=0x%x, channel=%d\n", + dev_dbg(musb->controller, "hw_ep=%d, is_tx=0x%x, channel=%d\n", hw_ep->epnum, is_tx, ch_num); return &(ux500_channel->channel); @@ -199,8 +204,9 @@ static struct dma_channel *ux500_dma_channel_allocate(struct dma_controller *c, static void ux500_dma_channel_release(struct dma_channel *channel) { struct ux500_dma_channel *ux500_channel = channel->private_data; + struct musb *musb = ux500_channel->controller->private_data; - DBG(7, "channel=%d\n", ux500_channel->ch_num); + dev_dbg(musb->controller, "channel=%d\n", ux500_channel->ch_num); if (ux500_channel->is_allocated) { ux500_channel->is_allocated = 0; @@ -250,8 +256,8 @@ static int ux500_dma_channel_abort(struct dma_channel *channel) void __iomem *epio = musb->endpoints[ux500_channel->hw_ep->epnum].regs; u16 csr; - DBG(4, "channel=%d, is_tx=%d\n", ux500_channel->ch_num, - ux500_channel->is_tx); + dev_dbg(musb->controller, "channel=%d, is_tx=%d\n", + ux500_channel->ch_num, ux500_channel->is_tx); if (channel->status == MUSB_DMA_STATUS_BUSY) { if (ux500_channel->is_tx) { -- cgit v1.2.3 From f847a79ab3c1faca3022061045cd22e4678c1b1c Mon Sep 17 00:00:00 2001 From: Per Forlin Date: Wed, 3 Aug 2011 15:39:15 +0200 Subject: usb: musb: cppi: fix build errors due to DBG and missing musb variable Replace DBG with dev_dbg and fix invalid access of musb->controller. With this patch cppi_dma builds successfully. Cc: Signed-off-by: Per Forlin Signed-off-by: Felipe Balbi --- drivers/usb/musb/cppi_dma.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c index 149f3f310a0a..318fb4e8a885 100644 --- a/drivers/usb/musb/cppi_dma.c +++ b/drivers/usb/musb/cppi_dma.c @@ -226,8 +226,10 @@ static int cppi_controller_stop(struct dma_controller *c) struct cppi *controller; void __iomem *tibase; int i; + struct musb *musb; controller = container_of(c, struct cppi, controller); + musb = controller->musb; tibase = controller->tibase; /* DISABLE INDIVIDUAL CHANNEL Interrupts */ @@ -289,9 +291,11 @@ cppi_channel_allocate(struct dma_controller *c, u8 index; struct cppi_channel *cppi_ch; void __iomem *tibase; + struct musb *musb; controller = container_of(c, struct cppi, controller); tibase = controller->tibase; + musb = controller->musb; /* ep0 doesn't use DMA; remember cppi indices are 0..N-1 */ index = ep->epnum - 1; @@ -339,7 +343,8 @@ static void cppi_channel_release(struct dma_channel *channel) c = container_of(channel, struct cppi_channel, channel); tibase = c->controller->tibase; if (!c->hw_ep) - dev_dbg(musb->controller, "releasing idle DMA channel %p\n", c); + dev_dbg(c->controller->musb->controller, + "releasing idle DMA channel %p\n", c); else if (!c->transmit) core_rxirq_enable(tibase, c->index + 1); @@ -357,10 +362,11 @@ cppi_dump_rx(int level, struct cppi_channel *c, const char *tag) musb_ep_select(base, c->index + 1); - DBG(level, "RX DMA%d%s: %d left, csr %04x, " - "%08x H%08x S%08x C%08x, " - "B%08x L%08x %08x .. %08x" - "\n", + dev_dbg(c->controller->musb->controller, + "RX DMA%d%s: %d left, csr %04x, " + "%08x H%08x S%08x C%08x, " + "B%08x L%08x %08x .. %08x" + "\n", c->index, tag, musb_readl(c->controller->tibase, DAVINCI_RXCPPI_BUFCNT0_REG + 4 * c->index), @@ -387,10 +393,11 @@ cppi_dump_tx(int level, struct cppi_channel *c, const char *tag) musb_ep_select(base, c->index + 1); - DBG(level, "TX DMA%d%s: csr %04x, " - "H%08x S%08x C%08x %08x, " - "F%08x L%08x .. %08x" - "\n", + dev_dbg(c->controller->musb->controller, + "TX DMA%d%s: csr %04x, " + "H%08x S%08x C%08x %08x, " + "F%08x L%08x .. %08x" + "\n", c->index, tag, musb_readw(c->hw_ep->regs, MUSB_TXCSR), @@ -1022,6 +1029,7 @@ static bool cppi_rx_scan(struct cppi *cppi, unsigned ch) int i; dma_addr_t safe2ack; void __iomem *regs = rx->hw_ep->regs; + struct musb *musb = cppi->musb; cppi_dump_rx(6, rx, "/K"); -- cgit v1.2.3 From cf6808cb09e72fa7ee8713bf48219a2eb98da91b Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Wed, 3 Aug 2011 21:41:26 -0700 Subject: usb: gadget: renesas_usbhs: fix DMA build by including dma-mapping.h Include dma-mapping.h to fix build of the renesas_usbhs driver | CC drivers/usb/renesas_usbhs/mod_gadget.o | drivers/usb/renesas_usbhs/mod_gadget.c: In function 'usbhsg_dma_map': | drivers/usb/renesas_usbhs/mod_gadget.c:190: error: implicit declaration of function 'dma_map_single' | drivers/usb/renesas_usbhs/mod_gadget.c:192: error: implicit declaration of function 'dma_sync_single_for_device' | drivers/usb/renesas_usbhs/mod_gadget.c:196: error: implicit declaration of function 'dma_mapping_error' | drivers/usb/renesas_usbhs/mod_gadget.c: In function 'usbhsg_dma_unmap': | drivers/usb/renesas_usbhs/mod_gadget.c:217: error: implicit declaration of function 'dma_unmap_single' | drivers/usb/renesas_usbhs/mod_gadget.c:219: error: implicit declaration of function 'dma_sync_single_for_cpu' | make[5]: *** [drivers/usb/renesas_usbhs/mod_gadget.o] Error 1 | make[4]: *** [drivers/usb/renesas_usbhs] Error 2 Reported-by: Magnus Damm Signed-off-by: Kuninori Morimoto Signed-off-by: Felipe Balbi --- drivers/usb/renesas_usbhs/mod_gadget.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index ba79dbf5adbc..e7101dc31e41 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -14,6 +14,7 @@ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ +#include #include #include #include -- cgit v1.2.3 From 240a16e2cd831cb25361b1d1797bd04e8faf8b4f Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 5 Aug 2011 13:29:49 +0300 Subject: usb: musb: tusb6010: fix compilation earlier commits have broken compilation of tusb6010 glue layer, fix it. Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_core.h | 12 ++++++++---- drivers/usb/musb/musb_regs.h | 6 ++++-- drivers/usb/musb/tusb6010.c | 1 + drivers/usb/musb/tusb6010_omap.c | 1 + 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h index 668eeef601ae..b3c065ab9dbc 100644 --- a/drivers/usb/musb/musb_core.h +++ b/drivers/usb/musb/musb_core.h @@ -172,7 +172,8 @@ enum musb_g_ep0_state { #endif /* TUSB mapping: "flat" plus ep0 special cases */ -#if defined(CONFIG_USB_MUSB_TUSB6010) +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) #define musb_ep_select(_mbase, _epnum) \ musb_writeb((_mbase), MUSB_INDEX, (_epnum)) #define MUSB_EP_OFFSET MUSB_TUSB_OFFSET @@ -241,7 +242,8 @@ struct musb_hw_ep { void __iomem *fifo; void __iomem *regs; -#ifdef CONFIG_USB_MUSB_TUSB6010 +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) void __iomem *conf; #endif @@ -258,7 +260,8 @@ struct musb_hw_ep { struct dma_channel *tx_channel; struct dma_channel *rx_channel; -#ifdef CONFIG_USB_MUSB_TUSB6010 +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) /* TUSB has "asynchronous" and "synchronous" dma modes */ dma_addr_t fifo_async; dma_addr_t fifo_sync; @@ -356,7 +359,8 @@ struct musb { void __iomem *ctrl_base; void __iomem *mregs; -#ifdef CONFIG_USB_MUSB_TUSB6010 +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) dma_addr_t async; dma_addr_t sync; void __iomem *sync_va; diff --git a/drivers/usb/musb/musb_regs.h b/drivers/usb/musb/musb_regs.h index 82410703dcd3..03f2655af290 100644 --- a/drivers/usb/musb/musb_regs.h +++ b/drivers/usb/musb/musb_regs.h @@ -234,7 +234,8 @@ #define MUSB_TESTMODE 0x0F /* 8 bit */ /* Get offset for a given FIFO from musb->mregs */ -#ifdef CONFIG_USB_MUSB_TUSB6010 +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) #define MUSB_FIFO_OFFSET(epnum) (0x200 + ((epnum) * 0x20)) #else #define MUSB_FIFO_OFFSET(epnum) (0x20 + ((epnum) * 4)) @@ -295,7 +296,8 @@ #define MUSB_FLAT_OFFSET(_epnum, _offset) \ (0x100 + (0x10*(_epnum)) + (_offset)) -#ifdef CONFIG_USB_MUSB_TUSB6010 +#if defined(CONFIG_USB_MUSB_TUSB6010) || \ + defined(CONFIG_USB_MUSB_TUSB6010_MODULE) /* TUSB6010 EP0 configuration register is special */ #define MUSB_TUSB_OFFSET(_epnum, _offset) \ (0x10 + _offset) diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c index 9eec41fbf3a4..ec1480191f78 100644 --- a/drivers/usb/musb/tusb6010.c +++ b/drivers/usb/musb/tusb6010.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/musb/tusb6010_omap.c b/drivers/usb/musb/tusb6010_omap.c index 07c8a73dfe41..b67b4bc596c1 100644 --- a/drivers/usb/musb/tusb6010_omap.c +++ b/drivers/usb/musb/tusb6010_omap.c @@ -20,6 +20,7 @@ #include #include "musb_core.h" +#include "tusb6010.h" #define to_chdat(c) ((struct tusb_omap_dma_ch *)(c)->private_data) -- cgit v1.2.3 From ad50c1b20ff27780afbb8bcd3d153393d360419e Mon Sep 17 00:00:00 2001 From: Bob Liu Date: Fri, 5 Aug 2011 17:33:05 +0800 Subject: usb: musb: blackfin: include prefetch head file After the prefetch/list.h restructure, drivers need to explicitly include linux/prefetch.h in order to use the prefetch() function. Otherwise, the current driver fails to build: drivers/usb/musb/blackfin.c: In function 'musb_write_fifo': drivers/usb/musb/blackfin.c:43: error: implicit declaration of function 'prefetch' Signed-off-by: Bob Liu Signed-off-by: Felipe Balbi --- drivers/usb/musb/blackfin.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c index ae8c39617743..5e7cfba5b079 100644 --- a/drivers/usb/musb/blackfin.c +++ b/drivers/usb/musb/blackfin.c @@ -17,6 +17,7 @@ #include #include #include +#include #include -- cgit v1.2.3 From bb8070c29ca87ac0aa24e04a1207cc932f62258f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 5 Aug 2011 22:14:46 +0200 Subject: usb: gadget: f_phonet: unlock in error case Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Felipe Balbi --- drivers/usb/gadget/f_phonet.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c index 8f8d3f6cd89e..8f3eab1af885 100644 --- a/drivers/usb/gadget/f_phonet.c +++ b/drivers/usb/gadget/f_phonet.c @@ -434,6 +434,7 @@ static int pn_set_alt(struct usb_function *f, unsigned intf, unsigned alt) config_ep_by_speed(gadget, f, fp->out_ep)) { fp->in_ep->desc = NULL; fp->out_ep->desc = NULL; + spin_unlock(&port->lock); return -EINVAL; } usb_ep_enable(fp->out_ep); -- cgit v1.2.3 From 6193d6997c90535af8f8491fc0019f785a3322b0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 10 Aug 2011 11:01:57 +0200 Subject: usb: musb: gadget: fix error path In case one "forgot" to load the receiver i.e. doing |modprobe omap2430 |modprobe musb_hdrc he ends up with: |musb-hdrc: version 6.0, ?dma?, otg (peripheral+host) |HS USB OTG: no transceiver configured |musb-hdrc musb-hdrc: musb_init_controller failed with status -19 |(NULL device *): gadget not registered. |Unable to handle kernel NULL pointer dereference at virtual address 0000001c |Internal error: Oops: 17 [#1] SMP |[] (sysfs_find_dirent+0x4/0x60) from [] (sysfs_get_dirent+0x28/0x78) |[] (sysfs_get_dirent+0x28/0x78) from [] (sysfs_unmerge_group+0x1c/0x90) |[] (sysfs_unmerge_group+0x1c/0x90) from [] (dpm_sysfs_remove+0x14/0x3c) |[] (dpm_sysfs_remove+0x14/0x3c) from [] (device_del+0x40/0x1b4) |[] (device_del+0x40/0x1b4) from [] (device_unregister+0xc/0x18) |[] (device_unregister+0xc/0x18) from [] (musb_free+0x24/0x88 [musb_hdrc]) |[] (musb_free+0x24/0x88 [musb_hdrc]) from [] (musb_probe+0xb50/0xe3c [musb_hdrc]) |[] (musb_probe+0xb50/0xe3c [musb_hdrc]) from [] (platform_drv_probe+0x1c/0x24) The problem is that musb_free() tries to figure out what was initializued and what wasn't and clean up only the initialized part. This works well for usb_del_gadget_udc() but device_unregister() can't deal with it. Therefore we rely on the fact the we always have a parent device and only then remove the device. I broke this in 0f91349 ("usb: gadget: convert all users to the new udc infrastructure") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Felipe Balbi --- drivers/usb/musb/musb_gadget.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index 8c41a2e6ea77..e81820370d6f 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1856,6 +1856,7 @@ int __init musb_gadget_setup(struct musb *musb) return 0; err: + musb->g.dev.parent = NULL; device_unregister(&musb->g.dev); return status; } @@ -1863,7 +1864,8 @@ err: void musb_gadget_cleanup(struct musb *musb) { usb_del_gadget_udc(&musb->g); - device_unregister(&musb->g.dev); + if (musb->g.dev.parent) + device_unregister(&musb->g.dev); } /* -- cgit v1.2.3 From 73104b5cfe3067d68f2c2de3f3d4d4964c55873e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 9 Aug 2011 17:09:06 +0000 Subject: drm/radeon/kms: don't enable connectors that are off in the hotplug handler If we get a hotplug event on an connector that is off, don't attempt to turn it on or off, it should already be off. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=728228 Signed-off-by: Alex Deucher Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 6d6b5f16bc09..519b5e2f1ee8 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -60,6 +60,10 @@ void radeon_connector_hotplug(struct drm_connector *connector) radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd); + /* if the connector is already off, don't turn it back on */ + if (connector->dpms != DRM_MODE_DPMS_ON) + return; + /* powering up/down the eDP panel generates hpd events which * can interfere with modesetting. */ -- cgit v1.2.3 From 33ae1827d6c3c79c5957536ec29d5a8780623147 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 11 Aug 2011 14:01:03 +0000 Subject: drm/radeon/kms: fix regression is handling >2 heads on cedar/caicos Need to add support for 4 crtcs when setting the possible crtcs for the encoders. Signed-off-by: Alex Deucher Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_encoders.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c index b293487e5aa3..319d85d7e759 100644 --- a/drivers/gpu/drm/radeon/radeon_encoders.c +++ b/drivers/gpu/drm/radeon/radeon_encoders.c @@ -2323,6 +2323,9 @@ radeon_add_atom_encoder(struct drm_device *dev, default: encoder->possible_crtcs = 0x3; break; + case 4: + encoder->possible_crtcs = 0xf; + break; case 6: encoder->possible_crtcs = 0x3f; break; -- cgit v1.2.3 From 92bdfd4a35415dd3741b95df60782a32c586d399 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 4 Aug 2011 17:28:40 +0000 Subject: drm/radeon/kms: make some watermark messages debug only Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/evergreen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 14dce9f22172..fb5fa0898868 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -743,7 +743,7 @@ static void evergreen_program_watermarks(struct radeon_device *rdev, !evergreen_average_bandwidth_vs_available_bandwidth(&wm) || !evergreen_check_latency_hiding(&wm) || (rdev->disp_priority == 2)) { - DRM_INFO("force priority to high\n"); + DRM_DEBUG_KMS("force priority to high\n"); priority_a_cnt |= PRIORITY_ALWAYS_ON; priority_b_cnt |= PRIORITY_ALWAYS_ON; } -- cgit v1.2.3 From 13bb9430cd6154d1f088549656c4a3ed10eaf35e Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 8 Aug 2011 16:21:15 +0000 Subject: drm/radeon: Allow panel preferred EDID to override BIOS native mode We have two sources of information about panel capabilities on mobile radeon - the BIOS, which gives us a native mode, and the panel's preferred mode. In theory these two will always match, but there's some corner cases where the BIOS hasn't been fully initialised and so the native mode in it ends up with default values. However, if we get a panel with reasonable EDID, it's probably the case that the panel's preferred mode does actually represent the panel capabilities. This patch handles that case by replacing the native mode with the panel's preferred mode if the resolutions don't match. Systems without a valid internal panel EDID will still use the BIOS native mode. Signed-off-by: Matthew Garrett Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 519b5e2f1ee8..441e07054853 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -478,11 +478,19 @@ static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder, { struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder); struct drm_display_mode *native_mode = &radeon_encoder->native_mode; + struct drm_display_mode *t, *mode; + + /* If the EDID preferred mode doesn't match the native mode, use it */ + list_for_each_entry_safe(mode, t, &connector->probed_modes, head) { + if (mode->type & DRM_MODE_TYPE_PREFERRED) { + if (mode->hdisplay != native_mode->hdisplay || + mode->vdisplay != native_mode->vdisplay) + memcpy(native_mode, mode, sizeof(*mode)); + } + } /* Try to get native mode details from EDID if necessary */ if (!native_mode->clock) { - struct drm_display_mode *t, *mode; - list_for_each_entry_safe(mode, t, &connector->probed_modes, head) { if (mode->hdisplay == native_mode->hdisplay && mode->vdisplay == native_mode->vdisplay) { @@ -493,6 +501,7 @@ static void radeon_fixup_lvds_native_mode(struct drm_encoder *encoder, } } } + if (!native_mode->clock) { DRM_DEBUG_KMS("No LVDS native mode details, disabling RMX\n"); radeon_encoder->rmx_type = RMX_OFF; -- cgit v1.2.3 From bcc65fd8e929a9d9d34d814d6efc1d2793546922 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Mon, 8 Aug 2011 16:21:16 +0000 Subject: drm/radeon: re-POST the asic on Apple hardware when booted via EFI At least some Apples program the GPU into a state that wedges the engine once userspace starts trying to perform accelerated operations. Executing the Atom init scripts gets the hardware back into a working state. The same hardware works fine when booted via BIOS emulation, so let's just execute the init scripts on Apples when we're using EFI. Signed-off-by: Matthew Garrett Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_device.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 440e6ecccc40..a3b011b49465 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "radeon_reg.h" #include "radeon.h" #include "atom.h" @@ -348,6 +349,9 @@ bool radeon_card_posted(struct radeon_device *rdev) { uint32_t reg; + if (efi_enabled && rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) + return false; + /* first check CRTCs */ if (ASIC_IS_DCE41(rdev)) { reg = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET) | -- cgit v1.2.3 From e9b52ef2228cd0bed7a4465c693a39489e2c338d Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Fri, 12 Aug 2011 00:55:37 +0400 Subject: perf: fix temporary file ownership check A file in /tmp/ might be a symlink, so lstat() should be used instead of stat(). Acked-by: Pekka Enberg Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20110811205537.GA22864@albatros Signed-off-by: Vasiliy Kulikov Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/symbol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index e142c21ae9a5..469c0264ed29 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1506,7 +1506,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) if (strncmp(dso->name, "/tmp/perf-", 10) == 0) { struct stat st; - if (stat(dso->name, &st) < 0) + if (lstat(dso->name, &st) < 0) return -1; if (st.st_uid && (st.st_uid != geteuid())) { -- cgit v1.2.3 From 8afa2a707d3d1320df5d35966729ac5262da737d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 11 Aug 2011 20:02:29 +0900 Subject: perf probe: Fix a memory leak for scopes array Fix a memory leak for scopes array when it finds a variable in the global scope. Reviewed-by: Pekka Enberg Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20110811110229.19900.63019.stgit@fedora15 Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 3e44a3e36519..573c72363223 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -660,6 +660,7 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) else { /* Search upper class */ nscopes = dwarf_getscopes_die(sp_die, &scopes); + ret = -ENOENT; while (nscopes-- > 1) { pr_debug("Searching variables in %s\n", dwarf_diename(&scopes[nscopes])); @@ -668,14 +669,12 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) pf->pvar->var, 0, &vr_die)) { ret = convert_variable(&vr_die, pf); - goto found; + break; } } if (scopes) free(scopes); - ret = -ENOENT; } -found: if (ret < 0) pr_warning("Failed to find '%s' in this function.\n", pf->pvar->var); -- cgit v1.2.3 From a128405c6b40371c59c34b00cc66ed06285b9551 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 11 Aug 2011 20:02:35 +0900 Subject: perf probe: Fix line walker to check CU correctly Fix line walker to check whether a given DIE is CU or not. Actually this function accepts CU, subprogram and inlined_subroutine DIEs. Without this fix, perf probe always fails to analyze lines on inlined functions; $ perf probe -L pre_schedule Debuginfo analysis failed. (-2) Error: Failed to show lines. (-2) This fixes that bug, as below. $ perf probe -L pre_schedule 0 static inline void pre_schedule(struct rq *rq, struct task_struct *prev { 2 if (prev->sched_class->pre_schedule) 3 prev->sched_class->pre_schedule(rq, prev); } /* rq->lock is NOT held, but preemption is disabled */ Changes from v1: - Update against current tip tree.(Fix dwarf-aux.c) Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20110811110235.19900.20614.stgit@fedora15 Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index fddf40f30d3e..d35b454f98b8 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -439,7 +439,7 @@ static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data) /** * die_walk_lines - Walk on lines inside given DIE - * @rt_die: a root DIE (CU or subprogram) + * @rt_die: a root DIE (CU, subprogram or inlined_subroutine) * @callback: callback routine * @data: user data * @@ -460,12 +460,12 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) size_t nlines, i; /* Get the CU die */ - if (dwarf_tag(rt_die) == DW_TAG_subprogram) + if (dwarf_tag(rt_die) != DW_TAG_compile_unit) cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); else cu_die = rt_die; if (!cu_die) { - pr_debug2("Failed to get CU from subprogram\n"); + pr_debug2("Failed to get CU from given DIE.\n"); return -EINVAL; } -- cgit v1.2.3 From b0e9cb2802d4bf50955cca8a7d87cf94ebf750a5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 11 Aug 2011 20:02:41 +0900 Subject: perf probe: Fix to search nested inlined functions in CU Fix perf probe to walk through the lines of all nested inlined function call sites and declared lines when a whole CU is passed to the line walker. The die_walk_lines() can have two different type of DIEs, subprogram (or inlined-subroutine) DIE and CU DIE. If a caller passes a subprogram DIE, this means that the walker walk on lines of given subprogram. In this case, it just needs to search on direct children of DIE tree for finding call-site information of inlined function which directly called from given subprogram. On the other hand, if a caller passes a CU DIE to the walker, this means that the walker have to walk on all lines in the source files included in given CU DIE. In this case, it has to search whole DIE trees of all subprograms to find the call-site information of all nested inlined functions. Without this patch: $ perf probe --line kernel/cpu.c:151-157 static int cpu_notify(unsigned long val, void *v) { 154 return __cpu_notify(val, v, -1, NULL); } With this: $ perf probe --line kernel/cpu.c:151-157 152 static int cpu_notify(unsigned long val, void *v) { 154 return __cpu_notify(val, v, -1, NULL); } As you can see, --line option with source line range shows the declared lines as probe-able. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20110811110241.19900.34994.stgit@fedora15 Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 91 +++++++++++++++++++++++++++++++++++++++------ tools/perf/util/dwarf-aux.h | 3 ++ 2 files changed, 82 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index d35b454f98b8..d9b8ad098498 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -198,6 +198,19 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, return 0; } +/* Get attribute and translate it as a sdata */ +static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, + Dwarf_Sword *result) +{ + Dwarf_Attribute attr; + + if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + dwarf_formsdata(&attr, result) != 0) + return -ENOENT; + + return 0; +} + /** * die_is_signed_type - Check whether a type DIE is signed or not * @tp_die: a DIE of a type @@ -250,6 +263,39 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) return 0; } +/* Get the call file index number in CU DIE */ +static int die_get_call_fileno(Dwarf_Die *in_die) +{ + Dwarf_Sword idx; + + if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0) + return (int)idx; + else + return -ENOENT; +} + +/** + * die_get_call_file - Get callsite file name of inlined function instance + * @in_die: a DIE of an inlined function instance + * + * Get call-site file name of @in_die. This means from which file the inline + * function is called. + */ +const char *die_get_call_file(Dwarf_Die *in_die) +{ + Dwarf_Die cu_die; + Dwarf_Files *files; + int idx; + + idx = die_get_call_fileno(in_die); + if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) || + dwarf_getsrcfiles(&cu_die, &files, NULL) != 0) + return NULL; + + return dwarf_filesrc(files, idx, NULL, NULL); +} + + /** * die_find_child - Generic DIE search function in DIE tree * @rt_die: a root DIE @@ -376,7 +422,7 @@ Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, /* Line walker internal parameters */ struct __line_walk_param { - const char *fname; + bool recursive; line_walk_callback_t callback; void *data; int retval; @@ -385,39 +431,56 @@ struct __line_walk_param { static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) { struct __line_walk_param *lw = data; - Dwarf_Addr addr; + Dwarf_Addr addr = 0; + const char *fname; int lineno; if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) { + fname = die_get_call_file(in_die); lineno = die_get_call_lineno(in_die); - if (lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) { - lw->retval = lw->callback(lw->fname, lineno, addr, - lw->data); + if (fname && lineno > 0 && dwarf_entrypc(in_die, &addr) == 0) { + lw->retval = lw->callback(fname, lineno, addr, lw->data); if (lw->retval != 0) return DIE_FIND_CB_END; } } - return DIE_FIND_CB_SIBLING; + if (!lw->recursive) + /* Don't need to search recursively */ + return DIE_FIND_CB_SIBLING; + + if (addr) { + fname = dwarf_decl_file(in_die); + if (fname && dwarf_decl_line(in_die, &lineno) == 0) { + lw->retval = lw->callback(fname, lineno, addr, lw->data); + if (lw->retval != 0) + return DIE_FIND_CB_END; + } + } + + /* Continue to search nested inlined function call-sites */ + return DIE_FIND_CB_CONTINUE; } /* Walk on lines of blocks included in given DIE */ -static int __die_walk_funclines(Dwarf_Die *sp_die, +static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive, line_walk_callback_t callback, void *data) { struct __line_walk_param lw = { + .recursive = recursive, .callback = callback, .data = data, .retval = 0, }; Dwarf_Die die_mem; Dwarf_Addr addr; + const char *fname; int lineno; /* Handle function declaration line */ - lw.fname = dwarf_decl_file(sp_die); - if (lw.fname && dwarf_decl_line(sp_die, &lineno) == 0 && + fname = dwarf_decl_file(sp_die); + if (fname && dwarf_decl_line(sp_die, &lineno) == 0 && dwarf_entrypc(sp_die, &addr) == 0) { - lw.retval = callback(lw.fname, lineno, addr, data); + lw.retval = callback(fname, lineno, addr, data); if (lw.retval != 0) goto done; } @@ -430,7 +493,7 @@ static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data) { struct __line_walk_param *lw = data; - lw->retval = __die_walk_funclines(sp_die, lw->callback, lw->data); + lw->retval = __die_walk_funclines(sp_die, true, lw->callback, lw->data); if (lw->retval != 0) return DWARF_CB_ABORT; @@ -509,7 +572,11 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) * subroutines. We have to check functions list or given function. */ if (rt_die != cu_die) - ret = __die_walk_funclines(rt_die, callback, data); + /* + * Don't need walk functions recursively, because nested + * inlined functions don't have lines of the specified DIE. + */ + ret = __die_walk_funclines(rt_die, false, callback, data); else { struct __line_walk_param param = { .callback = callback, diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index bc3b21167e70..c8e491bc133f 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -40,6 +40,9 @@ extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); /* Get callsite line number of inline-function instance */ extern int die_get_call_lineno(Dwarf_Die *in_die); +/* Get callsite file name of inlined function instance */ +extern const char *die_get_call_file(Dwarf_Die *in_die); + /* Get type die */ extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); -- cgit v1.2.3 From 36c0c588b9ea979b619d6ddced410f9551e4c5fa Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 11 Aug 2011 20:02:47 +0900 Subject: perf probe: Fix to walk all inline instances Fix line-range collector to walk all instances of inlined function, because some execution paths can be optimized out depending on the function argument of instances. E.g.) inline_func (arg) { if (arg) do_something; else do_another; } func_A() { inline_func(1) } func_B() { inline_func(0) } In this case, func_A may have only do_something code and func_B may have only do_another. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Masami Hiramatsu Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20110811110247.19900.93702.stgit@fedora15 Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 573c72363223..d6d57682473a 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -1393,7 +1393,13 @@ static int line_range_inline_cb(Dwarf_Die *in_die, void *data) struct dwarf_callback_param *param = data; param->retval = find_line_range_by_line(in_die, param->data); - return DWARF_CB_ABORT; /* No need to find other instances */ + + /* + * We have to check all instances of inlined function, because + * some execution paths can be optimized out depends on the + * function argument of instances + */ + return DWARF_CB_OK; } /* Search function from function name */ -- cgit v1.2.3 From 13e27d7686c457c625242fc2c20be30eef942408 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 11 Aug 2011 20:02:53 +0900 Subject: perf probe: Warn when more than one line are given Check multiple --lines option and print warning informing that only the first specified --line option is valid. Changes from the 1st post: - Accept only the first option instead of the last. - Fix warning message according to David's comment. - Mark as a bugfix. Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20110811110253.19900.96192.stgit@fedora15 Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-probe.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 5f2a5c7046df..710ae3d0a489 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -134,10 +134,18 @@ static int opt_show_lines(const struct option *opt __used, { int ret = 0; - if (str) - ret = parse_line_range_desc(str, ¶ms.line_range); - INIT_LIST_HEAD(¶ms.line_range.line_list); + if (!str) + return 0; + + if (params.show_lines) { + pr_warning("Warning: more than one --line options are" + " detected. Only the first one is valid.\n"); + return 0; + } + params.show_lines = true; + ret = parse_line_range_desc(str, ¶ms.line_range); + INIT_LIST_HEAD(¶ms.line_range.line_list); return ret; } -- cgit v1.2.3 From 221d061182b8ff5507d5768aeeecbc74f01c5dfa Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 11 Aug 2011 20:02:59 +0900 Subject: perf probe: Fix to search local variables in appropriate scope Fix perf probe to search local variables in appropriate local inlined function scope. For example, pre_schedule() has only 2 local variables, as below; $ perf probe -L pre_schedule 0 static inline void pre_schedule(struct rq *rq, struct task_struct *prev) { 2 if (prev->sched_class->pre_schedule) 3 prev->sched_class->pre_schedule(rq, prev); } However, current perf probe shows 4 local variables on pre_schedule(), because it searches variables in the caller(schedule()) scope. $ perf probe -V pre_schedule Available variables at pre_schedule @ int cpu long unsigned int* switch_count struct rq* rq struct task_struct* prev This patch fixes this issue by searching variables in the local scope of the instance of inlined function. Here is the result. $ perf probe -V pre_schedule Available variables at pre_schedule @ struct rq* rq struct task_struct* prev Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20110811110259.19900.85664.stgit@fedora15 Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 33 +++++++++++ tools/perf/util/dwarf-aux.h | 4 ++ tools/perf/util/probe-finder.c | 132 +++++++++++++++++++++++++++++++++-------- tools/perf/util/probe-finder.h | 2 +- 4 files changed, 144 insertions(+), 27 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index d9b8ad098498..425703a58638 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -96,6 +96,39 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr, return *lineno ?: -ENOENT; } +static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data); + +/** + * cu_walk_functions_at - Walk on function DIEs at given address + * @cu_die: A CU DIE + * @addr: An address + * @callback: A callback which called with found DIEs + * @data: A user data + * + * Walk on function DIEs at given @addr in @cu_die. Passed DIEs + * should be subprogram or inlined-subroutines. + */ +int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, + int (*callback)(Dwarf_Die *, void *), void *data) +{ + Dwarf_Die die_mem; + Dwarf_Die *sc_die; + int ret = -ENOENT; + + /* Inlined function could be recursive. Trace it until fail */ + for (sc_die = die_find_realfunc(cu_die, addr, &die_mem); + sc_die != NULL; + sc_die = die_find_child(sc_die, __die_find_inline_cb, &addr, + &die_mem)) { + ret = callback(sc_die, data); + if (ret) + break; + } + + return ret; + +} + /** * die_compare_name - Compare diename and tname * @dw_die: a DIE diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index c8e491bc133f..6f46106fd1d5 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -34,6 +34,10 @@ extern const char *cu_get_comp_dir(Dwarf_Die *cu_die); extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, const char **fname, int *lineno); +/* Walk on funcitons at given address */ +extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, + int (*callback)(Dwarf_Die *, void *), void *data); + /* Compare diename and tname */ extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index d6d57682473a..5c83b7d3d8ef 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -612,8 +612,8 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) return ret; } -/* Find a variable in a subprogram die */ -static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) +/* Find a variable in a scope DIE */ +static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) { Dwarf_Die vr_die, *scopes; char buf[32], *ptr; @@ -655,11 +655,11 @@ static int find_variable(Dwarf_Die *sp_die, struct probe_finder *pf) pr_debug("Searching '%s' variable in context.\n", pf->pvar->var); /* Search child die for local variables and parameters. */ - if (die_find_variable_at(sp_die, pf->pvar->var, pf->addr, &vr_die)) + if (die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) ret = convert_variable(&vr_die, pf); else { /* Search upper class */ - nscopes = dwarf_getscopes_die(sp_die, &scopes); + nscopes = dwarf_getscopes_die(sc_die, &scopes); ret = -ENOENT; while (nscopes-- > 1) { pr_debug("Searching variables in %s\n", @@ -717,26 +717,30 @@ static int convert_to_trace_point(Dwarf_Die *sp_die, Dwarf_Addr paddr, return 0; } -/* Call probe_finder callback with real subprogram DIE */ -static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf) +/* Call probe_finder callback with scope DIE */ +static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) { - Dwarf_Die die_mem; Dwarf_Attribute fb_attr; size_t nops; int ret; - /* If no real subprogram, find a real one */ - if (!sp_die || dwarf_tag(sp_die) != DW_TAG_subprogram) { - sp_die = die_find_realfunc(&pf->cu_die, pf->addr, &die_mem); - if (!sp_die) { + if (!sc_die) { + pr_err("Caller must pass a scope DIE. Program error.\n"); + return -EINVAL; + } + + /* If not a real subprogram, find a real one */ + if (dwarf_tag(sc_die) != DW_TAG_subprogram) { + if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) { pr_warning("Failed to find probe point in any " "functions.\n"); return -ENOENT; } - } + } else + memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die)); - /* Get the frame base attribute/ops */ - dwarf_attr(sp_die, DW_AT_frame_base, &fb_attr); + /* Get the frame base attribute/ops from subprogram */ + dwarf_attr(&pf->sp_die, DW_AT_frame_base, &fb_attr); ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) { pf->fb_ops = NULL; @@ -754,7 +758,7 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf) } /* Call finder's callback handler */ - ret = pf->callback(sp_die, pf); + ret = pf->callback(sc_die, pf); /* *pf->fb_ops will be cached in libdw. Don't free it. */ pf->fb_ops = NULL; @@ -762,17 +766,82 @@ static int call_probe_finder(Dwarf_Die *sp_die, struct probe_finder *pf) return ret; } +struct find_scope_param { + const char *function; + const char *file; + int line; + int diff; + Dwarf_Die *die_mem; + bool found; +}; + +static int find_best_scope_cb(Dwarf_Die *fn_die, void *data) +{ + struct find_scope_param *fsp = data; + const char *file; + int lno; + + /* Skip if declared file name does not match */ + if (fsp->file) { + file = dwarf_decl_file(fn_die); + if (!file || strcmp(fsp->file, file) != 0) + return 0; + } + /* If the function name is given, that's what user expects */ + if (fsp->function) { + if (die_compare_name(fn_die, fsp->function)) { + memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die)); + fsp->found = true; + return 1; + } + } else { + /* With the line number, find the nearest declared DIE */ + dwarf_decl_line(fn_die, &lno); + if (lno < fsp->line && fsp->diff > fsp->line - lno) { + /* Keep a candidate and continue */ + fsp->diff = fsp->line - lno; + memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die)); + fsp->found = true; + } + } + return 0; +} + +/* Find an appropriate scope fits to given conditions */ +static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem) +{ + struct find_scope_param fsp = { + .function = pf->pev->point.function, + .file = pf->fname, + .line = pf->lno, + .diff = INT_MAX, + .die_mem = die_mem, + .found = false, + }; + + cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb, &fsp); + + return fsp.found ? die_mem : NULL; +} + static int probe_point_line_walker(const char *fname, int lineno, Dwarf_Addr addr, void *data) { struct probe_finder *pf = data; + Dwarf_Die *sc_die, die_mem; int ret; if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0) return 0; pf->addr = addr; - ret = call_probe_finder(NULL, pf); + sc_die = find_best_scope(pf, &die_mem); + if (!sc_die) { + pr_warning("Failed to find scope of probe point.\n"); + return -ENOENT; + } + + ret = call_probe_finder(sc_die, pf); /* Continue if no error, because the line will be in inline function */ return ret < 0 ? ret : 0; @@ -826,6 +895,7 @@ static int probe_point_lazy_walker(const char *fname, int lineno, Dwarf_Addr addr, void *data) { struct probe_finder *pf = data; + Dwarf_Die *sc_die, die_mem; int ret; if (!line_list__has_line(&pf->lcache, lineno) || @@ -835,7 +905,14 @@ static int probe_point_lazy_walker(const char *fname, int lineno, pr_debug("Probe line found: line:%d addr:0x%llx\n", lineno, (unsigned long long)addr); pf->addr = addr; - ret = call_probe_finder(NULL, pf); + pf->lno = lineno; + sc_die = find_best_scope(pf, &die_mem); + if (!sc_die) { + pr_warning("Failed to find scope of probe point.\n"); + return -ENOENT; + } + + ret = call_probe_finder(sc_die, pf); /* * Continue if no error, because the lazy pattern will match @@ -1059,7 +1136,7 @@ found: } /* Add a found probe point into trace event list */ -static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf) +static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) { struct trace_event_finder *tf = container_of(pf, struct trace_event_finder, pf); @@ -1074,8 +1151,9 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf) } tev = &tf->tevs[tf->ntevs++]; - ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe, - &tev->point); + /* Trace point should be converted from subprogram DIE */ + ret = convert_to_trace_point(&pf->sp_die, pf->addr, + pf->pev->point.retprobe, &tev->point); if (ret < 0) return ret; @@ -1090,7 +1168,8 @@ static int add_probe_trace_event(Dwarf_Die *sp_die, struct probe_finder *pf) for (i = 0; i < pf->pev->nargs; i++) { pf->pvar = &pf->pev->args[i]; pf->tvar = &tev->args[i]; - ret = find_variable(sp_die, pf); + /* Variable should be found from scope DIE */ + ret = find_variable(sc_die, pf); if (ret != 0) return ret; } @@ -1158,7 +1237,7 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) } /* Add a found vars into available variables list */ -static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf) +static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) { struct available_var_finder *af = container_of(pf, struct available_var_finder, pf); @@ -1173,8 +1252,9 @@ static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf) } vl = &af->vls[af->nvls++]; - ret = convert_to_trace_point(sp_die, pf->addr, pf->pev->point.retprobe, - &vl->point); + /* Trace point should be converted from subprogram DIE */ + ret = convert_to_trace_point(&pf->sp_die, pf->addr, + pf->pev->point.retprobe, &vl->point); if (ret < 0) return ret; @@ -1186,14 +1266,14 @@ static int add_available_vars(Dwarf_Die *sp_die, struct probe_finder *pf) if (vl->vars == NULL) return -ENOMEM; af->child = true; - die_find_child(sp_die, collect_variables_cb, (void *)af, &die_mem); + die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem); /* Find external variables */ if (!af->externs) goto out; /* Don't need to search child DIE for externs. */ af->child = false; - nscopes = dwarf_getscopes_die(sp_die, &scopes); + nscopes = dwarf_getscopes_die(sc_die, &scopes); while (nscopes-- > 1) die_find_child(&scopes[nscopes], collect_variables_cb, (void *)af, &die_mem); diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index c478b42a2473..1132c8f0ce89 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -57,7 +57,7 @@ struct probe_finder { struct perf_probe_event *pev; /* Target probe event */ /* Callback when a probe point is found */ - int (*callback)(Dwarf_Die *sp_die, struct probe_finder *pf); + int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf); /* For function searching */ int lno; /* Line number */ -- cgit v1.2.3 From f182e3e13ca71b64b40fab1aef31fa6a78271648 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 11 Aug 2011 20:03:05 +0900 Subject: perf probe: Avoid searching variables in intermediate scopes Fix variable searching logic to search one in inner than local scope or global(CU) scope. In the other words, skip searching in intermediate scopes. e.g., in the following code, int var1; void inline infunc(int i) { i++; <--- [A] } void func(void) { int var1, var2; infunc(var2); } At [A], "var1" should point the global variable "var1", however, if user mis-typed as "var2", variable search should be failed. However, current logic searches variable infunc() scope, global scope, and then func() scope. Thus, it can find "var2" variable in func() scope. This may not be what user expects. So, it would better not search outer scopes except outermost (compile unit) scope which contains only global variables, when it failed to find given variable in local scope. E.g. Without this: $ perf probe -V pre_schedule --externs > without.vars With this: $ perf probe -V pre_schedule --externs > with.vars Check the diff: $ diff without.vars with.vars 88d87 < int cpu 133d131 < long unsigned int* switch_count These vars are actually in the scope of schedule(), the caller of pre_schedule(). Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20110811110305.19900.94374.stgit@fedora15 Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-finder.c | 44 +++++++++++++----------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 5c83b7d3d8ef..114542a5a99c 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -615,9 +615,9 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) /* Find a variable in a scope DIE */ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) { - Dwarf_Die vr_die, *scopes; + Dwarf_Die vr_die; char buf[32], *ptr; - int ret, nscopes; + int ret = 0; if (!is_c_varname(pf->pvar->var)) { /* Copy raw parameters */ @@ -652,29 +652,16 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) if (pf->tvar->name == NULL) return -ENOMEM; - pr_debug("Searching '%s' variable in context.\n", - pf->pvar->var); + pr_debug("Searching '%s' variable in context.\n", pf->pvar->var); /* Search child die for local variables and parameters. */ - if (die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) - ret = convert_variable(&vr_die, pf); - else { - /* Search upper class */ - nscopes = dwarf_getscopes_die(sc_die, &scopes); - ret = -ENOENT; - while (nscopes-- > 1) { - pr_debug("Searching variables in %s\n", - dwarf_diename(&scopes[nscopes])); - /* We should check this scope, so give dummy address */ - if (die_find_variable_at(&scopes[nscopes], - pf->pvar->var, 0, - &vr_die)) { - ret = convert_variable(&vr_die, pf); - break; - } - } - if (scopes) - free(scopes); + if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) { + /* Search again in global variables */ + if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die)) + ret = -ENOENT; } + if (ret == 0) + ret = convert_variable(&vr_die, pf); + if (ret < 0) pr_warning("Failed to find '%s' in this function.\n", pf->pvar->var); @@ -1242,8 +1229,8 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) struct available_var_finder *af = container_of(pf, struct available_var_finder, pf); struct variable_list *vl; - Dwarf_Die die_mem, *scopes = NULL; - int ret, nscopes; + Dwarf_Die die_mem; + int ret; /* Check number of tevs */ if (af->nvls == af->max_vls) { @@ -1273,12 +1260,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) goto out; /* Don't need to search child DIE for externs. */ af->child = false; - nscopes = dwarf_getscopes_die(sc_die, &scopes); - while (nscopes-- > 1) - die_find_child(&scopes[nscopes], collect_variables_cb, - (void *)af, &die_mem); - if (scopes) - free(scopes); + die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem); out: if (strlist__empty(vl->vars)) { -- cgit v1.2.3 From db0d2c6420eeb8fd669bac84d72f1ab828bbaa64 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 11 Aug 2011 20:03:11 +0900 Subject: perf probe: Search concrete out-of-line instances gcc 4.6 generates a concrete out-of-line instance when there is a function which is implicitly inlined somewhere but also has its own instance. The concrete out-of-line instance means that it has an abstract origin of the function which is referred by not only inlined-subroutines but also a concrete subprogram. Since current dwarf_func_inline_instances() can find only instances of inlined-subroutines, this introduces new die_walk_instances() to find both of subprogram and inlined-subroutines. e.g. without this, Available variables at sched_group_rt_period @ struct task_group* tg perf probe failed to find actual subprogram instance of sched_group_rt_period(). With this, Available variables at sched_group_rt_period @ struct task_group* tg @ struct task_group* tg Now it found the sched_group_rt_period() itself. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20110811110311.19900.63997.stgit@fedora15 Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 58 ++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/dwarf-aux.h | 4 +++ tools/perf/util/probe-finder.c | 56 +++++++++++++++------------------------- 3 files changed, 83 insertions(+), 35 deletions(-) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 425703a58638..d0f4048b634f 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -453,6 +453,64 @@ Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, return die_mem; } +struct __instance_walk_param { + void *addr; + int (*callback)(Dwarf_Die *, void *); + void *data; + int retval; +}; + +static int __die_walk_instances_cb(Dwarf_Die *inst, void *data) +{ + struct __instance_walk_param *iwp = data; + Dwarf_Attribute attr_mem; + Dwarf_Die origin_mem; + Dwarf_Attribute *attr; + Dwarf_Die *origin; + + attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem); + if (attr == NULL) + return DIE_FIND_CB_CONTINUE; + + origin = dwarf_formref_die(attr, &origin_mem); + if (origin == NULL || origin->addr != iwp->addr) + return DIE_FIND_CB_CONTINUE; + + iwp->retval = iwp->callback(inst, iwp->data); + + return (iwp->retval) ? DIE_FIND_CB_END : DIE_FIND_CB_CONTINUE; +} + +/** + * die_walk_instances - Walk on instances of given DIE + * @or_die: an abstract original DIE + * @callback: a callback function which is called with instance DIE + * @data: user data + * + * Walk on the instances of give @in_die. @in_die must be an inlined function + * declartion. This returns the return value of @callback if it returns + * non-zero value, or -ENOENT if there is no instance. + */ +int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *), + void *data) +{ + Dwarf_Die cu_die; + Dwarf_Die die_mem; + struct __instance_walk_param iwp = { + .addr = or_die->addr, + .callback = callback, + .data = data, + .retval = -ENOENT, + }; + + if (dwarf_diecu(or_die, &cu_die, NULL, NULL) == NULL) + return -ENOENT; + + die_find_child(&cu_die, __die_walk_instances_cb, &iwp, &die_mem); + + return iwp.retval; +} + /* Line walker internal parameters */ struct __line_walk_param { bool recursive; diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 6f46106fd1d5..6ce1717784b7 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -80,6 +80,10 @@ extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, Dwarf_Die *die_mem); +/* Walk on the instances of given DIE */ +extern int die_walk_instances(Dwarf_Die *in_die, + int (*callback)(Dwarf_Die *, void *), void *data); + /* Walker on lines (Note: line number will not be sorted) */ typedef int (* line_walk_callback_t) (const char *fname, int lineno, Dwarf_Addr addr, void *data); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 114542a5a99c..555fc3864b90 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -924,42 +924,39 @@ static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) return die_walk_lines(sp_die, probe_point_lazy_walker, pf); } -/* Callback parameter with return value */ -struct dwarf_callback_param { - void *data; - int retval; -}; - static int probe_point_inline_cb(Dwarf_Die *in_die, void *data) { - struct dwarf_callback_param *param = data; - struct probe_finder *pf = param->data; + struct probe_finder *pf = data; struct perf_probe_point *pp = &pf->pev->point; Dwarf_Addr addr; + int ret; if (pp->lazy_line) - param->retval = find_probe_point_lazy(in_die, pf); + ret = find_probe_point_lazy(in_die, pf); else { /* Get probe address */ if (dwarf_entrypc(in_die, &addr) != 0) { pr_warning("Failed to get entry address of %s.\n", dwarf_diename(in_die)); - param->retval = -ENOENT; - return DWARF_CB_ABORT; + return -ENOENT; } pf->addr = addr; pf->addr += pp->offset; pr_debug("found inline addr: 0x%jx\n", (uintmax_t)pf->addr); - param->retval = call_probe_finder(in_die, pf); - if (param->retval < 0) - return DWARF_CB_ABORT; + ret = call_probe_finder(in_die, pf); } - return DWARF_CB_OK; + return ret; } +/* Callback parameter with return value for libdw */ +struct dwarf_callback_param { + void *data; + int retval; +}; + /* Search function from function name */ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) { @@ -996,14 +993,10 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) /* TODO: Check the address in this function */ param->retval = call_probe_finder(sp_die, pf); } - } else { - struct dwarf_callback_param _param = {.data = (void *)pf, - .retval = 0}; + } else /* Inlined function: search instances */ - dwarf_func_inline_instances(sp_die, probe_point_inline_cb, - &_param); - param->retval = _param.retval; - } + param->retval = die_walk_instances(sp_die, + probe_point_inline_cb, (void *)pf); return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */ } @@ -1452,16 +1445,14 @@ static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf) static int line_range_inline_cb(Dwarf_Die *in_die, void *data) { - struct dwarf_callback_param *param = data; - - param->retval = find_line_range_by_line(in_die, param->data); + find_line_range_by_line(in_die, data); /* * We have to check all instances of inlined function, because * some execution paths can be optimized out depends on the * function argument of instances */ - return DWARF_CB_OK; + return 0; } /* Search function from function name */ @@ -1489,15 +1480,10 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e); lr->start = lf->lno_s; lr->end = lf->lno_e; - if (dwarf_func_inline(sp_die)) { - struct dwarf_callback_param _param; - _param.data = (void *)lf; - _param.retval = 0; - dwarf_func_inline_instances(sp_die, - line_range_inline_cb, - &_param); - param->retval = _param.retval; - } else + if (dwarf_func_inline(sp_die)) + param->retval = die_walk_instances(sp_die, + line_range_inline_cb, lf); + else param->retval = find_line_range_by_line(sp_die, lf); return DWARF_CB_ABORT; } -- cgit v1.2.3 From 3f4460a28fb2f73df6c32c3a305797abc01c0f9c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 11 Aug 2011 20:03:18 +0900 Subject: perf probe: Filter out redundant inline-instances With gcc4.6, some instances of concrete inlined function looks redundant and broken, because it appears inside of a concrete instance and its call_file and call_line are same as the original abstruct's decl_file and decl_line respectively. e.g. [ d1aa] subprogram external (flag) Yes name (strp) "add_timer" decl_file (data1) 2 ;here is original decl_line (data2) 847 ;line and file prototyped (flag) Yes inline (data1) inlined (1) sibling (ref4) [ d1c6] ... [ 11d84] subprogram abstract_origin (ref4) [ d1aa] ; concrete instance low_pc (addr) .text+0x000000000000246f high_pc (addr) .text+0x000000000000248b frame_base (block1) [ 0] call_frame_cfa sibling (ref4) [ 11dd9] [ 11d9f] formal_parameter abstract_origin (ref4) [ d1b9] location (data4) location list [ 701b] [ 11da8] inlined_subroutine abstract_origin (ref4) [ d1aa] ; redundant instance low_pc (addr) .text+0x000000000000247e high_pc (addr) .text+0x0000000000002480 call_file (data1) 2 ; call line and file call_line (data2) 847 ; are same as above Those redundant instances leads unwilling results; e.g. find probe points inside of functions even if we specify a function entry as below; $ perf probe -V add_timer Available variables at add_timer @ struct timer_list* timer @ (No matched variables) So, this filters out those redundant instances based on call-site and decl-site information. Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: yrl.pp-manager.tt@hitachi.com Link: http://lkml.kernel.org/r/20110811110317.19900.59525.stgit@fedora15 Signed-off-by: Masami Hiramatsu Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dwarf-aux.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index d0f4048b634f..ee51e9b4dc09 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -307,6 +307,17 @@ static int die_get_call_fileno(Dwarf_Die *in_die) return -ENOENT; } +/* Get the declared file index number in CU DIE */ +static int die_get_decl_fileno(Dwarf_Die *pdie) +{ + Dwarf_Sword idx; + + if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0) + return (int)idx; + else + return -ENOENT; +} + /** * die_get_call_file - Get callsite file name of inlined function instance * @in_die: a DIE of an inlined function instance @@ -467,6 +478,7 @@ static int __die_walk_instances_cb(Dwarf_Die *inst, void *data) Dwarf_Die origin_mem; Dwarf_Attribute *attr; Dwarf_Die *origin; + int tmp; attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem); if (attr == NULL) @@ -476,6 +488,16 @@ static int __die_walk_instances_cb(Dwarf_Die *inst, void *data) if (origin == NULL || origin->addr != iwp->addr) return DIE_FIND_CB_CONTINUE; + /* Ignore redundant instances */ + if (dwarf_tag(inst) == DW_TAG_inlined_subroutine) { + dwarf_decl_line(origin, &tmp); + if (die_get_call_lineno(inst) == tmp) { + tmp = die_get_decl_fileno(origin); + if (die_get_call_fileno(inst) == tmp) + return DIE_FIND_CB_CONTINUE; + } + } + iwp->retval = iwp->callback(inst, iwp->data); return (iwp->retval) ? DIE_FIND_CB_END : DIE_FIND_CB_CONTINUE; -- cgit v1.2.3 From 9c1176b6a28850703ea6e3a0f0c703f6d6c61cd3 Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 11 Aug 2011 00:06:04 +0200 Subject: firewire: cdev: fix 32 bit userland on 64 bit kernel compat corner cases Clemens points out that we need to use compat_ptr() in order to safely cast from u64 to addresses of a 32-bit usermode client. Before, our conversion went wrong - in practice if the client cast from pointer to integer such that sign-extension happened, (libraw1394 and libdc1394 at least were not doing that, IOW were not affected) or - in theory on s390 (which doesn't have FireWire though) and on the tile architecture, regardless of what the client does. The bug would usually be observed as the initial get_info ioctl failing with "Bad address" (EFAULT). Reported-by: Carl Karsten Reported-by: Clemens Ladisch Signed-off-by: Stefan Richter --- drivers/firewire/core-cdev.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c index e6ad3bb6c1a6..4799393247c8 100644 --- a/drivers/firewire/core-cdev.c +++ b/drivers/firewire/core-cdev.c @@ -216,15 +216,33 @@ struct inbound_phy_packet_event { struct fw_cdev_event_phy_packet phy_packet; }; -static inline void __user *u64_to_uptr(__u64 value) +#ifdef CONFIG_COMPAT +static void __user *u64_to_uptr(u64 value) +{ + if (is_compat_task()) + return compat_ptr(value); + else + return (void __user *)(unsigned long)value; +} + +static u64 uptr_to_u64(void __user *ptr) +{ + if (is_compat_task()) + return ptr_to_compat(ptr); + else + return (u64)(unsigned long)ptr; +} +#else +static inline void __user *u64_to_uptr(u64 value) { return (void __user *)(unsigned long)value; } -static inline __u64 uptr_to_u64(void __user *ptr) +static inline u64 uptr_to_u64(void __user *ptr) { - return (__u64)(unsigned long)ptr; + return (u64)(unsigned long)ptr; } +#endif /* CONFIG_COMPAT */ static int fw_device_op_open(struct inode *inode, struct file *file) { -- cgit v1.2.3 From a01e836087881dd9d824417190994c9b2b0f1dbb Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 11 Aug 2011 20:40:42 +0200 Subject: firewire: ohci: fix DMA unmapping in an error path If request_irq failed, we would pass wrong arguments to dma_free_coherent. https://bugzilla.redhat.com/show_bug.cgi?id=728185 Reported-by: Mads Kiilerich Signed-off-by: Stefan Richter --- drivers/firewire/ohci.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 4f6d72f87f6f..ded0c9bf96f4 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -2178,8 +2178,13 @@ static int ohci_enable(struct fw_card *card, ohci_driver_name, ohci)) { fw_error("Failed to allocate interrupt %d.\n", dev->irq); pci_disable_msi(dev); - dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE, - ohci->config_rom, ohci->config_rom_bus); + + if (config_rom) { + dma_free_coherent(ohci->card.device, CONFIG_ROM_SIZE, + ohci->next_config_rom, + ohci->next_config_rom_bus); + ohci->next_config_rom = NULL; + } return -EIO; } -- cgit v1.2.3 From 0ac8e58f3818795d02ac309bd57b4d93ec283a77 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 9 Aug 2011 12:24:17 +0100 Subject: ARM: fix perf build with uclibc toolchains libio.h is not provided by uClibc, in order to be able to test the definition of __UCLIBC__ we need to include stdlib.h, which also includes stddef.h, providing the definition of 'NULL'. Signed-off-by: Florian Fainelli Signed-off-by: Will Deacon --- tools/perf/arch/arm/util/dwarf-regs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c index fff6450c8c99..e8d5c551c69c 100644 --- a/tools/perf/arch/arm/util/dwarf-regs.c +++ b/tools/perf/arch/arm/util/dwarf-regs.c @@ -8,7 +8,10 @@ * published by the Free Software Foundation. */ +#include +#ifndef __UCLIBC__ #include +#endif #include struct pt_regs_dwarfnum { -- cgit v1.2.3 From 49bef8331afefa4dd75f7124c50bde47168f5492 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 10 Aug 2011 10:20:17 +0100 Subject: ARM: perf: fix prototype of release_pmu Commit f12482c9 ("ARM: 6974/1: pmu: refactor reservation") changed the prototype of release_pmu, but missed the stub for when CONFIG_CPU_HAS_PMU is not selected by the platform. This patch changes the prototype of the stub, preventing possible build failures when CONFIG_CPU_HAS_PMU is not selected. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/include/asm/pmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 67c70a31a1be..8ae32ba092c2 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -75,7 +75,7 @@ reserve_pmu(enum arm_pmu_type device) } static inline int -release_pmu(struct platform_device *pdev) +release_pmu(enum arm_pmu_type device) { return -ENODEV; } -- cgit v1.2.3 From 7fdd3c49629e8aab48dbd1b2f800854b0f93cba0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 12 Aug 2011 10:42:48 +0100 Subject: ARM: perf: make name of arm_pmu_type consistent Commit f12482c9 ("ARM: 6974/1: pmu: refactor reservation") changed {release,reserve}_pmu to take an enum arm_pmu_type as a parameter, but inconsistently named the parameter `type' or `device'. It would be nice if these were consistent. This patch makes use of enum arm_pmu_type consistent, always using `type'. Related printks are updated, explicitly mentioning `type' also. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm/include/asm/pmu.h | 10 +++++----- arch/arm/kernel/pmu.c | 26 +++++++++++++------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/arm/include/asm/pmu.h b/arch/arm/include/asm/pmu.h index 8ae32ba092c2..b7e82c4aced6 100644 --- a/arch/arm/include/asm/pmu.h +++ b/arch/arm/include/asm/pmu.h @@ -41,7 +41,7 @@ struct arm_pmu_platdata { * encoded error on failure. */ extern struct platform_device * -reserve_pmu(enum arm_pmu_type device); +reserve_pmu(enum arm_pmu_type type); /** * release_pmu() - Relinquish control of the performance counters @@ -62,26 +62,26 @@ release_pmu(enum arm_pmu_type type); * the actual hardware initialisation. */ extern int -init_pmu(enum arm_pmu_type device); +init_pmu(enum arm_pmu_type type); #else /* CONFIG_CPU_HAS_PMU */ #include static inline struct platform_device * -reserve_pmu(enum arm_pmu_type device) +reserve_pmu(enum arm_pmu_type type) { return ERR_PTR(-ENODEV); } static inline int -release_pmu(enum arm_pmu_type device) +release_pmu(enum arm_pmu_type type) { return -ENODEV; } static inline int -init_pmu(enum arm_pmu_type device) +init_pmu(enum arm_pmu_type type) { return -ENODEV; } diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c index 2b70709376c3..c53474fe84df 100644 --- a/arch/arm/kernel/pmu.c +++ b/arch/arm/kernel/pmu.c @@ -31,7 +31,7 @@ static int __devinit pmu_register(struct platform_device *pdev, { if (type < 0 || type >= ARM_NUM_PMU_DEVICES) { pr_warning("received registration request for unknown " - "device %d\n", type); + "PMU device type %d\n", type); return -EINVAL; } @@ -112,17 +112,17 @@ static int __init register_pmu_driver(void) device_initcall(register_pmu_driver); struct platform_device * -reserve_pmu(enum arm_pmu_type device) +reserve_pmu(enum arm_pmu_type type) { struct platform_device *pdev; - if (test_and_set_bit_lock(device, &pmu_lock)) { + if (test_and_set_bit_lock(type, &pmu_lock)) { pdev = ERR_PTR(-EBUSY); - } else if (pmu_devices[device] == NULL) { - clear_bit_unlock(device, &pmu_lock); + } else if (pmu_devices[type] == NULL) { + clear_bit_unlock(type, &pmu_lock); pdev = ERR_PTR(-ENODEV); } else { - pdev = pmu_devices[device]; + pdev = pmu_devices[type]; } return pdev; @@ -130,11 +130,11 @@ reserve_pmu(enum arm_pmu_type device) EXPORT_SYMBOL_GPL(reserve_pmu); int -release_pmu(enum arm_pmu_type device) +release_pmu(enum arm_pmu_type type) { - if (WARN_ON(!pmu_devices[device])) + if (WARN_ON(!pmu_devices[type])) return -EINVAL; - clear_bit_unlock(device, &pmu_lock); + clear_bit_unlock(type, &pmu_lock); return 0; } EXPORT_SYMBOL_GPL(release_pmu); @@ -182,17 +182,17 @@ init_cpu_pmu(void) } int -init_pmu(enum arm_pmu_type device) +init_pmu(enum arm_pmu_type type) { int err = 0; - switch (device) { + switch (type) { case ARM_PMU_DEVICE_CPU: err = init_cpu_pmu(); break; default: - pr_warning("attempt to initialise unknown device %d\n", - device); + pr_warning("attempt to initialise PMU of unknown " + "type %d\n", type); err = -EINVAL; } -- cgit v1.2.3 From 5cb843ca0f781b62dc9793b26926d0b8efef5576 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 20 Jul 2011 11:57:03 +0100 Subject: ARM: realview: ensure visibility of writes during reset The various reset routines in mach-realview rely on an FPGA to power-cycle the board after writing some magic runes to memory-mapped registers. This patch adds a dsb() following the writes, so that they become visible before we mdelay(1000) in the arch_reset code. Without this patch, the timeout would expire sporadically, causing the reset to fail. Signed-off-by: Will Deacon --- arch/arm/mach-realview/include/mach/system.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-realview/include/mach/system.h b/arch/arm/mach-realview/include/mach/system.h index a30f2e3ec178..6657ff231161 100644 --- a/arch/arm/mach-realview/include/mach/system.h +++ b/arch/arm/mach-realview/include/mach/system.h @@ -44,6 +44,7 @@ static inline void arch_reset(char mode, const char *cmd) */ if (realview_reset) realview_reset(mode); + dsb(); } #endif -- cgit v1.2.3 From dfc40b24c0a37593724f3317cd485c73ee878c18 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 20 Jul 2011 14:18:46 +0100 Subject: ARM: twd: register clockevents device before enabling PPI The smp_twd clockevents driver currently enables the local timer PPI before the clockevents device is registered. This can lead to a kernel panic if a spurious timer interrupt is generated before registration has completed since the kernel will treat it as an IPI timer. This patch moves the clockevents device registration before the IRQ unmasking so that we can always handle timer interrupts once they can occur. Acked-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm/kernel/smp_twd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 2c277d40cee6..01c186222f3b 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -137,8 +137,8 @@ void __cpuinit twd_timer_setup(struct clock_event_device *clk) clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk); clk->min_delta_ns = clockevent_delta2ns(0xf, clk); + clockevents_register_device(clk); + /* Make sure our local interrupt controller has this enabled */ gic_enable_ppi(clk->irq); - - clockevents_register_device(clk); } -- cgit v1.2.3 From 72dc53acd50db066a5a5ebe1f39fae73d7e62aa8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 3 Aug 2011 12:37:04 +0100 Subject: ARM: cache: detect VIPT aliasing I-cache on ARMv6 The current cache detection code does not check for an aliasing I-cache if the D-cache is found to be VIPT aliasing. This patch fixes the problem by always checking for an aliasing I-cache on v6 and later. Signed-off-by: Will Deacon --- arch/arm/kernel/setup.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 70bca649e925..e514c76043b4 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -280,18 +280,19 @@ static void __init cacheid_init(void) if (arch >= CPU_ARCH_ARMv6) { if ((cachetype & (7 << 29)) == 4 << 29) { /* ARMv7 register format */ + arch = CPU_ARCH_ARMv7; cacheid = CACHEID_VIPT_NONALIASING; if ((cachetype & (3 << 14)) == 1 << 14) cacheid |= CACHEID_ASID_TAGGED; - else if (cpu_has_aliasing_icache(CPU_ARCH_ARMv7)) - cacheid |= CACHEID_VIPT_I_ALIASING; - } else if (cachetype & (1 << 23)) { - cacheid = CACHEID_VIPT_ALIASING; } else { - cacheid = CACHEID_VIPT_NONALIASING; - if (cpu_has_aliasing_icache(CPU_ARCH_ARMv6)) - cacheid |= CACHEID_VIPT_I_ALIASING; + arch = CPU_ARCH_ARMv6; + if (cachetype & (1 << 23)) + cacheid = CACHEID_VIPT_ALIASING; + else + cacheid = CACHEID_VIPT_NONALIASING; } + if (cpu_has_aliasing_icache(arch)) + cacheid |= CACHEID_VIPT_I_ALIASING; } else { cacheid = CACHEID_VIVT; } -- cgit v1.2.3 From f6b864a9071e21186476910613ec9913b56067a2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 12 Aug 2011 18:22:10 +0200 Subject: ASoC: Fix compile warning in wm8750.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sound/soc/codecs/wm8750.c:784:2: warning: missing braces around initializer sound/soc/codecs/wm8750.c:784:2: warning: (near initialization for ‘wm8750_spi_ids[2].name’) It's because struct spi_device_id.name is a char array, not a pointer, while the driver initializes explicitly with 0. Signed-off-by: Takashi Iwai --- sound/soc/codecs/wm8750.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8750.c b/sound/soc/codecs/wm8750.c index 82ac5fcaa2b2..d0003cc3bcd6 100644 --- a/sound/soc/codecs/wm8750.c +++ b/sound/soc/codecs/wm8750.c @@ -781,7 +781,7 @@ static int __devexit wm8750_spi_remove(struct spi_device *spi) static const struct spi_device_id wm8750_spi_ids[] = { { "wm8750", 0 }, { "wm8987", 0 }, - { 0, 0 }, + { }, }; MODULE_DEVICE_TABLE(spi, wm8750_spi_ids); -- cgit v1.2.3 From f8afdf481f0fef5e170c6c928cec42879d505654 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 12 Aug 2011 13:31:30 -0400 Subject: drivers/net/wireless/wl12xx: add missing kfree In each case, the freed data should be freed in the error handling code as well. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @exists@ local idexpression x; statement S,S1; expression E; identifier fl; expression *ptr != NULL; @@ x = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S <... when != x when != if (...) { <+...kfree(x)...+> } when any when != true x == NULL x->fl ...> ( if (x == NULL) S1 | if (...) { ... when != x when forall ( return \(0\|<+...x...+>\|ptr\); | * return ...; ) } ) // Signed-off-by: Julia Lawall Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/acx.c | 6 +----- drivers/net/wireless/wl12xx/testmode.c | 5 ++++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/wireless/wl12xx/acx.c b/drivers/net/wireless/wl12xx/acx.c index 7e33f1f4f3d4..34f6ab53e519 100644 --- a/drivers/net/wireless/wl12xx/acx.c +++ b/drivers/net/wireless/wl12xx/acx.c @@ -77,8 +77,6 @@ int wl1271_acx_sleep_auth(struct wl1271 *wl, u8 sleep_auth) auth->sleep_auth = sleep_auth; ret = wl1271_cmd_configure(wl, ACX_SLEEP_AUTH, auth, sizeof(*auth)); - if (ret < 0) - return ret; out: kfree(auth); @@ -624,10 +622,8 @@ int wl1271_acx_cca_threshold(struct wl1271 *wl) ret = wl1271_cmd_configure(wl, ACX_CCA_THRESHOLD, detection, sizeof(*detection)); - if (ret < 0) { + if (ret < 0) wl1271_warning("failed to set cca threshold: %d", ret); - return ret; - } out: kfree(detection); diff --git a/drivers/net/wireless/wl12xx/testmode.c b/drivers/net/wireless/wl12xx/testmode.c index 5d5e1ef87206..88add68bd9ac 100644 --- a/drivers/net/wireless/wl12xx/testmode.c +++ b/drivers/net/wireless/wl12xx/testmode.c @@ -139,12 +139,15 @@ static int wl1271_tm_cmd_interrogate(struct wl1271 *wl, struct nlattr *tb[]) if (ret < 0) { wl1271_warning("testmode cmd interrogate failed: %d", ret); + kfree(cmd); return ret; } skb = cfg80211_testmode_alloc_reply_skb(wl->hw->wiphy, sizeof(*cmd)); - if (!skb) + if (!skb) { + kfree(cmd); return -ENOMEM; + } NLA_PUT(skb, WL1271_TM_ATTR_DATA, sizeof(*cmd), cmd); -- cgit v1.2.3 From 06f8e2d6754dc631732415b741b5aa58a0f7133f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 12 Aug 2011 13:57:55 -0500 Subject: xfs: don't expect xfs headers to be in subdirectories Fix up some #include directives in preparation for moving a few header files out of xfs source subdirectories. Note that "xfs_linux.h" also got its quoting convention for included files switched. Signed-off-by: Alex Elder Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 2 +- fs/xfs/linux-2.6/xfs_linux.h | 27 +++++++++++++-------------- fs/xfs/linux-2.6/xfs_quotaops.c | 2 +- fs/xfs/linux-2.6/xfs_trace.c | 4 ++-- fs/xfs/xfs.h | 3 ++- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 75bb316529dd..b100cf445880 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,7 +16,7 @@ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # -ccflags-y := -I$(src) -I$(src)/linux-2.6 +ccflags-y := -I$(src) -I$(src)/linux-2.6 -I$(src)/quota -I$(src)/support ccflags-$(CONFIG_XFS_DEBUG) += -g XFS_LINUX := linux-2.6 diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index d42f814e4d35..1e8a45e74c3e 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -32,13 +32,12 @@ # define XFS_BIG_INUMS 0 #endif -#include +#include "xfs_types.h" -#include -#include -#include - -#include +#include "kmem.h" +#include "mrlock.h" +#include "time.h" +#include "uuid.h" #include #include @@ -78,14 +77,14 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include +#include "xfs_vnode.h" +#include "xfs_stats.h" +#include "xfs_sysctl.h" +#include "xfs_iops.h" +#include "xfs_aops.h" +#include "xfs_super.h" +#include "xfs_buf.h" +#include "xfs_message.h" #ifdef __BIG_ENDIAN #define XFS_NATIVE_HOST 1 diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c index 29b9d642e93d..7e76f537abb7 100644 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ b/fs/xfs/linux-2.6/xfs_quotaops.c @@ -25,7 +25,7 @@ #include "xfs_trans.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "quota/xfs_qm.h" +#include "xfs_qm.h" #include diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c index 88d25d4aa56e..9010ce885e6a 100644 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ b/fs/xfs/linux-2.6/xfs_trace.c @@ -43,8 +43,8 @@ #include "xfs_quota.h" #include "xfs_iomap.h" #include "xfs_aops.h" -#include "quota/xfs_dquot_item.h" -#include "quota/xfs_dquot.h" +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" #include "xfs_log_recover.h" #include "xfs_inode_item.h" diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h index 53ec3ea9a625..d8b11b7f94aa 100644 --- a/fs/xfs/xfs.h +++ b/fs/xfs/xfs.h @@ -24,5 +24,6 @@ #define XFS_BUF_LOCK_TRACKING 1 #endif -#include +#include "xfs_linux.h" + #endif /* __XFS_H__ */ -- cgit v1.2.3 From c59d87c460767bc35dafd490139d3cfe78fb8da4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 Aug 2011 16:21:35 -0500 Subject: xfs: remove subdirectories Use the move from Linux 2.6 to Linux 3.x as an excuse to kill the annoying subdirectories in the XFS source code. Besides the large amount of file rename the only changes are to the Makefile, a few files including headers with the subdirectory prefix, and the binary sysctl compat code that includes a header under fs/xfs/ from kernel/. Signed-off-by: Christoph Hellwig Signed-off-by: Alex Elder --- fs/xfs/Makefile | 117 +- fs/xfs/kmem.c | 132 +++ fs/xfs/kmem.h | 124 ++ fs/xfs/linux-2.6/kmem.c | 132 --- fs/xfs/linux-2.6/kmem.h | 124 -- fs/xfs/linux-2.6/mrlock.h | 90 -- fs/xfs/linux-2.6/time.h | 36 - fs/xfs/linux-2.6/xfs_acl.c | 420 ------- fs/xfs/linux-2.6/xfs_aops.c | 1499 ------------------------ fs/xfs/linux-2.6/xfs_aops.h | 68 -- fs/xfs/linux-2.6/xfs_buf.c | 1876 ------------------------------ fs/xfs/linux-2.6/xfs_buf.h | 326 ------ fs/xfs/linux-2.6/xfs_discard.c | 222 ---- fs/xfs/linux-2.6/xfs_discard.h | 10 - fs/xfs/linux-2.6/xfs_export.c | 250 ---- fs/xfs/linux-2.6/xfs_export.h | 72 -- fs/xfs/linux-2.6/xfs_file.c | 1096 ------------------ fs/xfs/linux-2.6/xfs_fs_subr.c | 96 -- fs/xfs/linux-2.6/xfs_globals.c | 43 - fs/xfs/linux-2.6/xfs_ioctl.c | 1556 ------------------------- fs/xfs/linux-2.6/xfs_ioctl.h | 85 -- fs/xfs/linux-2.6/xfs_ioctl32.c | 672 ----------- fs/xfs/linux-2.6/xfs_ioctl32.h | 237 ---- fs/xfs/linux-2.6/xfs_iops.c | 1210 -------------------- fs/xfs/linux-2.6/xfs_iops.h | 30 - fs/xfs/linux-2.6/xfs_linux.h | 309 ----- fs/xfs/linux-2.6/xfs_message.c | 108 -- fs/xfs/linux-2.6/xfs_message.h | 39 - fs/xfs/linux-2.6/xfs_quotaops.c | 139 --- fs/xfs/linux-2.6/xfs_stats.c | 122 -- fs/xfs/linux-2.6/xfs_stats.h | 223 ---- fs/xfs/linux-2.6/xfs_super.c | 1773 ---------------------------- fs/xfs/linux-2.6/xfs_super.h | 87 -- fs/xfs/linux-2.6/xfs_sync.c | 1065 ----------------- fs/xfs/linux-2.6/xfs_sync.h | 51 - fs/xfs/linux-2.6/xfs_sysctl.c | 252 ---- fs/xfs/linux-2.6/xfs_sysctl.h | 102 -- fs/xfs/linux-2.6/xfs_trace.c | 56 - fs/xfs/linux-2.6/xfs_trace.h | 1746 ---------------------------- fs/xfs/linux-2.6/xfs_vnode.h | 64 -- fs/xfs/linux-2.6/xfs_xattr.c | 241 ---- fs/xfs/mrlock.h | 90 ++ fs/xfs/quota/xfs_dquot.c | 1454 ----------------------- fs/xfs/quota/xfs_dquot.h | 137 --- fs/xfs/quota/xfs_dquot_item.c | 529 --------- fs/xfs/quota/xfs_dquot_item.h | 48 - fs/xfs/quota/xfs_qm.c | 2416 --------------------------------------- fs/xfs/quota/xfs_qm.h | 166 --- fs/xfs/quota/xfs_qm_bhv.c | 176 --- fs/xfs/quota/xfs_qm_stats.c | 105 -- fs/xfs/quota/xfs_qm_stats.h | 53 - fs/xfs/quota/xfs_qm_syscalls.c | 906 --------------- fs/xfs/quota/xfs_quota_priv.h | 53 - fs/xfs/quota/xfs_trans_dquot.c | 890 -------------- fs/xfs/support/uuid.c | 63 - fs/xfs/support/uuid.h | 29 - fs/xfs/time.h | 36 + fs/xfs/uuid.c | 63 + fs/xfs/uuid.h | 29 + fs/xfs/xfs_acl.c | 420 +++++++ fs/xfs/xfs_aops.c | 1499 ++++++++++++++++++++++++ fs/xfs/xfs_aops.h | 68 ++ fs/xfs/xfs_buf.c | 1876 ++++++++++++++++++++++++++++++ fs/xfs/xfs_buf.h | 326 ++++++ fs/xfs/xfs_discard.c | 222 ++++ fs/xfs/xfs_discard.h | 10 + fs/xfs/xfs_dquot.c | 1454 +++++++++++++++++++++++ fs/xfs/xfs_dquot.h | 137 +++ fs/xfs/xfs_dquot_item.c | 529 +++++++++ fs/xfs/xfs_dquot_item.h | 48 + fs/xfs/xfs_export.c | 250 ++++ fs/xfs/xfs_export.h | 72 ++ fs/xfs/xfs_file.c | 1096 ++++++++++++++++++ fs/xfs/xfs_fs_subr.c | 96 ++ fs/xfs/xfs_globals.c | 43 + fs/xfs/xfs_ioctl.c | 1556 +++++++++++++++++++++++++ fs/xfs/xfs_ioctl.h | 85 ++ fs/xfs/xfs_ioctl32.c | 672 +++++++++++ fs/xfs/xfs_ioctl32.h | 237 ++++ fs/xfs/xfs_iops.c | 1210 ++++++++++++++++++++ fs/xfs/xfs_iops.h | 30 + fs/xfs/xfs_linux.h | 309 +++++ fs/xfs/xfs_message.c | 108 ++ fs/xfs/xfs_message.h | 39 + fs/xfs/xfs_qm.c | 2416 +++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_qm.h | 166 +++ fs/xfs/xfs_qm_bhv.c | 176 +++ fs/xfs/xfs_qm_stats.c | 105 ++ fs/xfs/xfs_qm_stats.h | 53 + fs/xfs/xfs_qm_syscalls.c | 906 +++++++++++++++ fs/xfs/xfs_quota_priv.h | 53 + fs/xfs/xfs_quotaops.c | 139 +++ fs/xfs/xfs_stats.c | 122 ++ fs/xfs/xfs_stats.h | 223 ++++ fs/xfs/xfs_super.c | 1773 ++++++++++++++++++++++++++++ fs/xfs/xfs_super.h | 87 ++ fs/xfs/xfs_sync.c | 1065 +++++++++++++++++ fs/xfs/xfs_sync.h | 51 + fs/xfs/xfs_sysctl.c | 252 ++++ fs/xfs/xfs_sysctl.h | 102 ++ fs/xfs/xfs_trace.c | 56 + fs/xfs/xfs_trace.h | 1746 ++++++++++++++++++++++++++++ fs/xfs/xfs_trans_dquot.c | 890 ++++++++++++++ fs/xfs/xfs_vnode.h | 64 ++ fs/xfs/xfs_xattr.c | 241 ++++ kernel/sysctl_binary.c | 2 +- kernel/sysctl_check.c | 2 +- 107 files changed, 23610 insertions(+), 23615 deletions(-) create mode 100644 fs/xfs/kmem.c create mode 100644 fs/xfs/kmem.h delete mode 100644 fs/xfs/linux-2.6/kmem.c delete mode 100644 fs/xfs/linux-2.6/kmem.h delete mode 100644 fs/xfs/linux-2.6/mrlock.h delete mode 100644 fs/xfs/linux-2.6/time.h delete mode 100644 fs/xfs/linux-2.6/xfs_acl.c delete mode 100644 fs/xfs/linux-2.6/xfs_aops.c delete mode 100644 fs/xfs/linux-2.6/xfs_aops.h delete mode 100644 fs/xfs/linux-2.6/xfs_buf.c delete mode 100644 fs/xfs/linux-2.6/xfs_buf.h delete mode 100644 fs/xfs/linux-2.6/xfs_discard.c delete mode 100644 fs/xfs/linux-2.6/xfs_discard.h delete mode 100644 fs/xfs/linux-2.6/xfs_export.c delete mode 100644 fs/xfs/linux-2.6/xfs_export.h delete mode 100644 fs/xfs/linux-2.6/xfs_file.c delete mode 100644 fs/xfs/linux-2.6/xfs_fs_subr.c delete mode 100644 fs/xfs/linux-2.6/xfs_globals.c delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl.c delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl.h delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl32.c delete mode 100644 fs/xfs/linux-2.6/xfs_ioctl32.h delete mode 100644 fs/xfs/linux-2.6/xfs_iops.c delete mode 100644 fs/xfs/linux-2.6/xfs_iops.h delete mode 100644 fs/xfs/linux-2.6/xfs_linux.h delete mode 100644 fs/xfs/linux-2.6/xfs_message.c delete mode 100644 fs/xfs/linux-2.6/xfs_message.h delete mode 100644 fs/xfs/linux-2.6/xfs_quotaops.c delete mode 100644 fs/xfs/linux-2.6/xfs_stats.c delete mode 100644 fs/xfs/linux-2.6/xfs_stats.h delete mode 100644 fs/xfs/linux-2.6/xfs_super.c delete mode 100644 fs/xfs/linux-2.6/xfs_super.h delete mode 100644 fs/xfs/linux-2.6/xfs_sync.c delete mode 100644 fs/xfs/linux-2.6/xfs_sync.h delete mode 100644 fs/xfs/linux-2.6/xfs_sysctl.c delete mode 100644 fs/xfs/linux-2.6/xfs_sysctl.h delete mode 100644 fs/xfs/linux-2.6/xfs_trace.c delete mode 100644 fs/xfs/linux-2.6/xfs_trace.h delete mode 100644 fs/xfs/linux-2.6/xfs_vnode.h delete mode 100644 fs/xfs/linux-2.6/xfs_xattr.c create mode 100644 fs/xfs/mrlock.h delete mode 100644 fs/xfs/quota/xfs_dquot.c delete mode 100644 fs/xfs/quota/xfs_dquot.h delete mode 100644 fs/xfs/quota/xfs_dquot_item.c delete mode 100644 fs/xfs/quota/xfs_dquot_item.h delete mode 100644 fs/xfs/quota/xfs_qm.c delete mode 100644 fs/xfs/quota/xfs_qm.h delete mode 100644 fs/xfs/quota/xfs_qm_bhv.c delete mode 100644 fs/xfs/quota/xfs_qm_stats.c delete mode 100644 fs/xfs/quota/xfs_qm_stats.h delete mode 100644 fs/xfs/quota/xfs_qm_syscalls.c delete mode 100644 fs/xfs/quota/xfs_quota_priv.h delete mode 100644 fs/xfs/quota/xfs_trans_dquot.c delete mode 100644 fs/xfs/support/uuid.c delete mode 100644 fs/xfs/support/uuid.h create mode 100644 fs/xfs/time.h create mode 100644 fs/xfs/uuid.c create mode 100644 fs/xfs/uuid.h create mode 100644 fs/xfs/xfs_acl.c create mode 100644 fs/xfs/xfs_aops.c create mode 100644 fs/xfs/xfs_aops.h create mode 100644 fs/xfs/xfs_buf.c create mode 100644 fs/xfs/xfs_buf.h create mode 100644 fs/xfs/xfs_discard.c create mode 100644 fs/xfs/xfs_discard.h create mode 100644 fs/xfs/xfs_dquot.c create mode 100644 fs/xfs/xfs_dquot.h create mode 100644 fs/xfs/xfs_dquot_item.c create mode 100644 fs/xfs/xfs_dquot_item.h create mode 100644 fs/xfs/xfs_export.c create mode 100644 fs/xfs/xfs_export.h create mode 100644 fs/xfs/xfs_file.c create mode 100644 fs/xfs/xfs_fs_subr.c create mode 100644 fs/xfs/xfs_globals.c create mode 100644 fs/xfs/xfs_ioctl.c create mode 100644 fs/xfs/xfs_ioctl.h create mode 100644 fs/xfs/xfs_ioctl32.c create mode 100644 fs/xfs/xfs_ioctl32.h create mode 100644 fs/xfs/xfs_iops.c create mode 100644 fs/xfs/xfs_iops.h create mode 100644 fs/xfs/xfs_linux.h create mode 100644 fs/xfs/xfs_message.c create mode 100644 fs/xfs/xfs_message.h create mode 100644 fs/xfs/xfs_qm.c create mode 100644 fs/xfs/xfs_qm.h create mode 100644 fs/xfs/xfs_qm_bhv.c create mode 100644 fs/xfs/xfs_qm_stats.c create mode 100644 fs/xfs/xfs_qm_stats.h create mode 100644 fs/xfs/xfs_qm_syscalls.c create mode 100644 fs/xfs/xfs_quota_priv.h create mode 100644 fs/xfs/xfs_quotaops.c create mode 100644 fs/xfs/xfs_stats.c create mode 100644 fs/xfs/xfs_stats.h create mode 100644 fs/xfs/xfs_super.c create mode 100644 fs/xfs/xfs_super.h create mode 100644 fs/xfs/xfs_sync.c create mode 100644 fs/xfs/xfs_sync.h create mode 100644 fs/xfs/xfs_sysctl.c create mode 100644 fs/xfs/xfs_sysctl.h create mode 100644 fs/xfs/xfs_trace.c create mode 100644 fs/xfs/xfs_trace.h create mode 100644 fs/xfs/xfs_trans_dquot.c create mode 100644 fs/xfs/xfs_vnode.h create mode 100644 fs/xfs/xfs_xattr.c diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index b100cf445880..ffce328309b8 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,44 +16,51 @@ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # -ccflags-y := -I$(src) -I$(src)/linux-2.6 -I$(src)/quota -I$(src)/support ccflags-$(CONFIG_XFS_DEBUG) += -g -XFS_LINUX := linux-2.6 - obj-$(CONFIG_XFS_FS) += xfs.o -xfs-y += linux-2.6/xfs_trace.o - -xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \ - xfs_dquot.o \ - xfs_dquot_item.o \ - xfs_trans_dquot.o \ - xfs_qm_syscalls.o \ - xfs_qm_bhv.o \ - xfs_qm.o) -xfs-$(CONFIG_XFS_QUOTA) += linux-2.6/xfs_quotaops.o - -ifeq ($(CONFIG_XFS_QUOTA),y) -xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o -endif - -xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o -xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o -xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o -xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o -xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o +# this one should be compiled first, as the tracing macros can easily blow up +xfs-y += xfs_trace.o +# highlevel code +xfs-y += xfs_aops.o \ + xfs_bit.o \ + xfs_buf.o \ + xfs_dfrag.o \ + xfs_discard.o \ + xfs_error.o \ + xfs_export.o \ + xfs_file.o \ + xfs_filestream.o \ + xfs_fsops.o \ + xfs_fs_subr.o \ + xfs_globals.o \ + xfs_iget.o \ + xfs_ioctl.o \ + xfs_iomap.o \ + xfs_iops.o \ + xfs_itable.o \ + xfs_message.o \ + xfs_mru_cache.o \ + xfs_super.o \ + xfs_sync.o \ + xfs_xattr.o \ + xfs_rename.o \ + xfs_rw.o \ + xfs_utils.o \ + xfs_vnodeops.o \ + kmem.o \ + uuid.o +# code shared with libxfs xfs-y += xfs_alloc.o \ xfs_alloc_btree.o \ xfs_attr.o \ xfs_attr_leaf.o \ - xfs_bit.o \ xfs_bmap.o \ xfs_bmap_btree.o \ xfs_btree.o \ - xfs_buf_item.o \ xfs_da_btree.o \ xfs_dir2.o \ xfs_dir2_block.o \ @@ -61,49 +68,37 @@ xfs-y += xfs_alloc.o \ xfs_dir2_leaf.o \ xfs_dir2_node.o \ xfs_dir2_sf.o \ - xfs_error.o \ - xfs_extfree_item.o \ - xfs_filestream.o \ - xfs_fsops.o \ xfs_ialloc.o \ xfs_ialloc_btree.o \ - xfs_iget.o \ xfs_inode.o \ - xfs_inode_item.o \ - xfs_iomap.o \ - xfs_itable.o \ - xfs_dfrag.o \ - xfs_log.o \ - xfs_log_cil.o \ xfs_log_recover.o \ xfs_mount.o \ - xfs_mru_cache.o \ - xfs_rename.o \ - xfs_trans.o \ + xfs_trans.o + +# low-level transaction/log code +xfs-y += xfs_log.o \ + xfs_log_cil.o \ + xfs_buf_item.o \ + xfs_extfree_item.o \ + xfs_inode_item.o \ xfs_trans_ail.o \ xfs_trans_buf.o \ xfs_trans_extfree.o \ xfs_trans_inode.o \ - xfs_utils.o \ - xfs_vnodeops.o \ - xfs_rw.o - -# Objects in linux/ -xfs-y += $(addprefix $(XFS_LINUX)/, \ - kmem.o \ - xfs_aops.o \ - xfs_buf.o \ - xfs_discard.o \ - xfs_export.o \ - xfs_file.o \ - xfs_fs_subr.o \ - xfs_globals.o \ - xfs_ioctl.o \ - xfs_iops.o \ - xfs_message.o \ - xfs_super.o \ - xfs_sync.o \ - xfs_xattr.o) -# Objects in support/ -xfs-y += support/uuid.o +# optional features +xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ + xfs_dquot_item.o \ + xfs_trans_dquot.o \ + xfs_qm_syscalls.o \ + xfs_qm_bhv.o \ + xfs_qm.o \ + xfs_quotaops.o +ifeq ($(CONFIG_XFS_QUOTA),y) +xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o +endif +xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o +xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o +xfs-$(CONFIG_PROC_FS) += xfs_stats.o +xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o +xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c new file mode 100644 index 000000000000..a907de565db3 --- /dev/null +++ b/fs/xfs/kmem.c @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include +#include +#include "time.h" +#include "kmem.h" +#include "xfs_message.h" + +/* + * Greedy allocation. May fail and may return vmalloced memory. + * + * Must be freed using kmem_free_large. + */ +void * +kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) +{ + void *ptr; + size_t kmsize = maxsize; + + while (!(ptr = kmem_zalloc_large(kmsize))) { + if ((kmsize >>= 1) <= minsize) + kmsize = minsize; + } + if (ptr) + *size = kmsize; + return ptr; +} + +void * +kmem_alloc(size_t size, unsigned int __nocast flags) +{ + int retries = 0; + gfp_t lflags = kmem_flags_convert(flags); + void *ptr; + + do { + ptr = kmalloc(size, lflags); + if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) + return ptr; + if (!(++retries % 100)) + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", + __func__, lflags); + congestion_wait(BLK_RW_ASYNC, HZ/50); + } while (1); +} + +void * +kmem_zalloc(size_t size, unsigned int __nocast flags) +{ + void *ptr; + + ptr = kmem_alloc(size, flags); + if (ptr) + memset((char *)ptr, 0, (int)size); + return ptr; +} + +void +kmem_free(const void *ptr) +{ + if (!is_vmalloc_addr(ptr)) { + kfree(ptr); + } else { + vfree(ptr); + } +} + +void * +kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, + unsigned int __nocast flags) +{ + void *new; + + new = kmem_alloc(newsize, flags); + if (ptr) { + if (new) + memcpy(new, ptr, + ((oldsize < newsize) ? oldsize : newsize)); + kmem_free(ptr); + } + return new; +} + +void * +kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) +{ + int retries = 0; + gfp_t lflags = kmem_flags_convert(flags); + void *ptr; + + do { + ptr = kmem_cache_alloc(zone, lflags); + if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) + return ptr; + if (!(++retries % 100)) + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", + __func__, lflags); + congestion_wait(BLK_RW_ASYNC, HZ/50); + } while (1); +} + +void * +kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) +{ + void *ptr; + + ptr = kmem_zone_alloc(zone, flags); + if (ptr) + memset((char *)ptr, 0, kmem_cache_size(zone)); + return ptr; +} diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h new file mode 100644 index 000000000000..f7c8f7a9ea6d --- /dev/null +++ b/fs/xfs/kmem.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPPORT_KMEM_H__ +#define __XFS_SUPPORT_KMEM_H__ + +#include +#include +#include +#include + +/* + * General memory allocation interfaces + */ + +#define KM_SLEEP 0x0001u +#define KM_NOSLEEP 0x0002u +#define KM_NOFS 0x0004u +#define KM_MAYFAIL 0x0008u + +/* + * We use a special process flag to avoid recursive callbacks into + * the filesystem during transactions. We will also issue our own + * warnings, so we explicitly skip any generic ones (silly of us). + */ +static inline gfp_t +kmem_flags_convert(unsigned int __nocast flags) +{ + gfp_t lflags; + + BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); + + if (flags & KM_NOSLEEP) { + lflags = GFP_ATOMIC | __GFP_NOWARN; + } else { + lflags = GFP_KERNEL | __GFP_NOWARN; + if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) + lflags &= ~__GFP_FS; + } + return lflags; +} + +extern void *kmem_alloc(size_t, unsigned int __nocast); +extern void *kmem_zalloc(size_t, unsigned int __nocast); +extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); +extern void kmem_free(const void *); + +static inline void *kmem_zalloc_large(size_t size) +{ + void *ptr; + + ptr = vmalloc(size); + if (ptr) + memset(ptr, 0, size); + return ptr; +} +static inline void kmem_free_large(void *ptr) +{ + vfree(ptr); +} + +extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); + +/* + * Zone interfaces + */ + +#define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN +#define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT +#define KM_ZONE_SPREAD SLAB_MEM_SPREAD + +#define kmem_zone kmem_cache +#define kmem_zone_t struct kmem_cache + +static inline kmem_zone_t * +kmem_zone_init(int size, char *zone_name) +{ + return kmem_cache_create(zone_name, size, 0, 0, NULL); +} + +static inline kmem_zone_t * +kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, + void (*construct)(void *)) +{ + return kmem_cache_create(zone_name, size, 0, flags, construct); +} + +static inline void +kmem_zone_free(kmem_zone_t *zone, void *ptr) +{ + kmem_cache_free(zone, ptr); +} + +static inline void +kmem_zone_destroy(kmem_zone_t *zone) +{ + if (zone) + kmem_cache_destroy(zone); +} + +extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); + +static inline int +kmem_shake_allow(gfp_t gfp_mask) +{ + return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)); +} + +#endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c deleted file mode 100644 index a907de565db3..000000000000 --- a/fs/xfs/linux-2.6/kmem.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include -#include -#include -#include -#include -#include -#include "time.h" -#include "kmem.h" -#include "xfs_message.h" - -/* - * Greedy allocation. May fail and may return vmalloced memory. - * - * Must be freed using kmem_free_large. - */ -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) -{ - void *ptr; - size_t kmsize = maxsize; - - while (!(ptr = kmem_zalloc_large(kmsize))) { - if ((kmsize >>= 1) <= minsize) - kmsize = minsize; - } - if (ptr) - *size = kmsize; - return ptr; -} - -void * -kmem_alloc(size_t size, unsigned int __nocast flags) -{ - int retries = 0; - gfp_t lflags = kmem_flags_convert(flags); - void *ptr; - - do { - ptr = kmalloc(size, lflags); - if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) - return ptr; - if (!(++retries % 100)) - xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, lflags); - congestion_wait(BLK_RW_ASYNC, HZ/50); - } while (1); -} - -void * -kmem_zalloc(size_t size, unsigned int __nocast flags) -{ - void *ptr; - - ptr = kmem_alloc(size, flags); - if (ptr) - memset((char *)ptr, 0, (int)size); - return ptr; -} - -void -kmem_free(const void *ptr) -{ - if (!is_vmalloc_addr(ptr)) { - kfree(ptr); - } else { - vfree(ptr); - } -} - -void * -kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, - unsigned int __nocast flags) -{ - void *new; - - new = kmem_alloc(newsize, flags); - if (ptr) { - if (new) - memcpy(new, ptr, - ((oldsize < newsize) ? oldsize : newsize)); - kmem_free(ptr); - } - return new; -} - -void * -kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) -{ - int retries = 0; - gfp_t lflags = kmem_flags_convert(flags); - void *ptr; - - do { - ptr = kmem_cache_alloc(zone, lflags); - if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP))) - return ptr; - if (!(++retries % 100)) - xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, lflags); - congestion_wait(BLK_RW_ASYNC, HZ/50); - } while (1); -} - -void * -kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) -{ - void *ptr; - - ptr = kmem_zone_alloc(zone, flags); - if (ptr) - memset((char *)ptr, 0, kmem_cache_size(zone)); - return ptr; -} diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h deleted file mode 100644 index f7c8f7a9ea6d..000000000000 --- a/fs/xfs/linux-2.6/kmem.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_KMEM_H__ -#define __XFS_SUPPORT_KMEM_H__ - -#include -#include -#include -#include - -/* - * General memory allocation interfaces - */ - -#define KM_SLEEP 0x0001u -#define KM_NOSLEEP 0x0002u -#define KM_NOFS 0x0004u -#define KM_MAYFAIL 0x0008u - -/* - * We use a special process flag to avoid recursive callbacks into - * the filesystem during transactions. We will also issue our own - * warnings, so we explicitly skip any generic ones (silly of us). - */ -static inline gfp_t -kmem_flags_convert(unsigned int __nocast flags) -{ - gfp_t lflags; - - BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); - - if (flags & KM_NOSLEEP) { - lflags = GFP_ATOMIC | __GFP_NOWARN; - } else { - lflags = GFP_KERNEL | __GFP_NOWARN; - if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) - lflags &= ~__GFP_FS; - } - return lflags; -} - -extern void *kmem_alloc(size_t, unsigned int __nocast); -extern void *kmem_zalloc(size_t, unsigned int __nocast); -extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); -extern void kmem_free(const void *); - -static inline void *kmem_zalloc_large(size_t size) -{ - void *ptr; - - ptr = vmalloc(size); - if (ptr) - memset(ptr, 0, size); - return ptr; -} -static inline void kmem_free_large(void *ptr) -{ - vfree(ptr); -} - -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); - -/* - * Zone interfaces - */ - -#define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN -#define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT -#define KM_ZONE_SPREAD SLAB_MEM_SPREAD - -#define kmem_zone kmem_cache -#define kmem_zone_t struct kmem_cache - -static inline kmem_zone_t * -kmem_zone_init(int size, char *zone_name) -{ - return kmem_cache_create(zone_name, size, 0, 0, NULL); -} - -static inline kmem_zone_t * -kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, - void (*construct)(void *)) -{ - return kmem_cache_create(zone_name, size, 0, flags, construct); -} - -static inline void -kmem_zone_free(kmem_zone_t *zone, void *ptr) -{ - kmem_cache_free(zone, ptr); -} - -static inline void -kmem_zone_destroy(kmem_zone_t *zone) -{ - if (zone) - kmem_cache_destroy(zone); -} - -extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); - -static inline int -kmem_shake_allow(gfp_t gfp_mask) -{ - return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)); -} - -#endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h deleted file mode 100644 index ff6a19873e5c..000000000000 --- a/fs/xfs/linux-2.6/mrlock.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_MRLOCK_H__ -#define __XFS_SUPPORT_MRLOCK_H__ - -#include - -typedef struct { - struct rw_semaphore mr_lock; -#ifdef DEBUG - int mr_writer; -#endif -} mrlock_t; - -#ifdef DEBUG -#define mrinit(mrp, name) \ - do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) -#else -#define mrinit(mrp, name) \ - do { init_rwsem(&(mrp)->mr_lock); } while (0) -#endif - -#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) -#define mrfree(mrp) do { } while (0) - -static inline void mraccess_nested(mrlock_t *mrp, int subclass) -{ - down_read_nested(&mrp->mr_lock, subclass); -} - -static inline void mrupdate_nested(mrlock_t *mrp, int subclass) -{ - down_write_nested(&mrp->mr_lock, subclass); -#ifdef DEBUG - mrp->mr_writer = 1; -#endif -} - -static inline int mrtryaccess(mrlock_t *mrp) -{ - return down_read_trylock(&mrp->mr_lock); -} - -static inline int mrtryupdate(mrlock_t *mrp) -{ - if (!down_write_trylock(&mrp->mr_lock)) - return 0; -#ifdef DEBUG - mrp->mr_writer = 1; -#endif - return 1; -} - -static inline void mrunlock_excl(mrlock_t *mrp) -{ -#ifdef DEBUG - mrp->mr_writer = 0; -#endif - up_write(&mrp->mr_lock); -} - -static inline void mrunlock_shared(mrlock_t *mrp) -{ - up_read(&mrp->mr_lock); -} - -static inline void mrdemote(mrlock_t *mrp) -{ -#ifdef DEBUG - mrp->mr_writer = 0; -#endif - downgrade_write(&mrp->mr_lock); -} - -#endif /* __XFS_SUPPORT_MRLOCK_H__ */ diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h deleted file mode 100644 index 387e695a184c..000000000000 --- a/fs/xfs/linux-2.6/time.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_TIME_H__ -#define __XFS_SUPPORT_TIME_H__ - -#include -#include - -typedef struct timespec timespec_t; - -static inline void delay(long ticks) -{ - schedule_timeout_uninterruptible(ticks); -} - -static inline void nanotime(struct timespec *tvp) -{ - *tvp = CURRENT_TIME; -} - -#endif /* __XFS_SUPPORT_TIME_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c deleted file mode 100644 index b6c4b3795c4a..000000000000 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Copyright (c) 2008, Christoph Hellwig - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_acl.h" -#include "xfs_attr.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_vnodeops.h" -#include "xfs_trace.h" -#include -#include -#include - - -/* - * Locking scheme: - * - all ACL updates are protected by inode->i_mutex, which is taken before - * calling into this file. - */ - -STATIC struct posix_acl * -xfs_acl_from_disk(struct xfs_acl *aclp) -{ - struct posix_acl_entry *acl_e; - struct posix_acl *acl; - struct xfs_acl_entry *ace; - int count, i; - - count = be32_to_cpu(aclp->acl_cnt); - - acl = posix_acl_alloc(count, GFP_KERNEL); - if (!acl) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < count; i++) { - acl_e = &acl->a_entries[i]; - ace = &aclp->acl_entry[i]; - - /* - * The tag is 32 bits on disk and 16 bits in core. - * - * Because every access to it goes through the core - * format first this is not a problem. - */ - acl_e->e_tag = be32_to_cpu(ace->ae_tag); - acl_e->e_perm = be16_to_cpu(ace->ae_perm); - - switch (acl_e->e_tag) { - case ACL_USER: - case ACL_GROUP: - acl_e->e_id = be32_to_cpu(ace->ae_id); - break; - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - acl_e->e_id = ACL_UNDEFINED_ID; - break; - default: - goto fail; - } - } - return acl; - -fail: - posix_acl_release(acl); - return ERR_PTR(-EINVAL); -} - -STATIC void -xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) -{ - const struct posix_acl_entry *acl_e; - struct xfs_acl_entry *ace; - int i; - - aclp->acl_cnt = cpu_to_be32(acl->a_count); - for (i = 0; i < acl->a_count; i++) { - ace = &aclp->acl_entry[i]; - acl_e = &acl->a_entries[i]; - - ace->ae_tag = cpu_to_be32(acl_e->e_tag); - ace->ae_id = cpu_to_be32(acl_e->e_id); - ace->ae_perm = cpu_to_be16(acl_e->e_perm); - } -} - -struct posix_acl * -xfs_get_acl(struct inode *inode, int type) -{ - struct xfs_inode *ip = XFS_I(inode); - struct posix_acl *acl; - struct xfs_acl *xfs_acl; - int len = sizeof(struct xfs_acl); - unsigned char *ea_name; - int error; - - acl = get_cached_acl(inode, type); - if (acl != ACL_NOT_CACHED) - return acl; - - trace_xfs_get_acl(ip); - - switch (type) { - case ACL_TYPE_ACCESS: - ea_name = SGI_ACL_FILE; - break; - case ACL_TYPE_DEFAULT: - ea_name = SGI_ACL_DEFAULT; - break; - default: - BUG(); - } - - /* - * If we have a cached ACLs value just return it, not need to - * go out to the disk. - */ - - xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); - if (!xfs_acl) - return ERR_PTR(-ENOMEM); - - error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, - &len, ATTR_ROOT); - if (error) { - /* - * If the attribute doesn't exist make sure we have a negative - * cache entry, for any other error assume it is transient and - * leave the cache entry as ACL_NOT_CACHED. - */ - if (error == -ENOATTR) { - acl = NULL; - goto out_update_cache; - } - goto out; - } - - acl = xfs_acl_from_disk(xfs_acl); - if (IS_ERR(acl)) - goto out; - - out_update_cache: - set_cached_acl(inode, type, acl); - out: - kfree(xfs_acl); - return acl; -} - -STATIC int -xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) -{ - struct xfs_inode *ip = XFS_I(inode); - unsigned char *ea_name; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - switch (type) { - case ACL_TYPE_ACCESS: - ea_name = SGI_ACL_FILE; - break; - case ACL_TYPE_DEFAULT: - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - ea_name = SGI_ACL_DEFAULT; - break; - default: - return -EINVAL; - } - - if (acl) { - struct xfs_acl *xfs_acl; - int len; - - xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); - if (!xfs_acl) - return -ENOMEM; - - xfs_acl_to_disk(xfs_acl, acl); - len = sizeof(struct xfs_acl) - - (sizeof(struct xfs_acl_entry) * - (XFS_ACL_MAX_ENTRIES - acl->a_count)); - - error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, - len, ATTR_ROOT); - - kfree(xfs_acl); - } else { - /* - * A NULL ACL argument means we want to remove the ACL. - */ - error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT); - - /* - * If the attribute didn't exist to start with that's fine. - */ - if (error == -ENOATTR) - error = 0; - } - - if (!error) - set_cached_acl(inode, type, acl); - return error; -} - -static int -xfs_set_mode(struct inode *inode, umode_t mode) -{ - int error = 0; - - if (mode != inode->i_mode) { - struct iattr iattr; - - iattr.ia_valid = ATTR_MODE | ATTR_CTIME; - iattr.ia_mode = mode; - iattr.ia_ctime = current_fs_time(inode->i_sb); - - error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); - } - - return error; -} - -static int -xfs_acl_exists(struct inode *inode, unsigned char *name) -{ - int len = sizeof(struct xfs_acl); - - return (xfs_attr_get(XFS_I(inode), name, NULL, &len, - ATTR_ROOT|ATTR_KERNOVAL) == 0); -} - -int -posix_acl_access_exists(struct inode *inode) -{ - return xfs_acl_exists(inode, SGI_ACL_FILE); -} - -int -posix_acl_default_exists(struct inode *inode) -{ - if (!S_ISDIR(inode->i_mode)) - return 0; - return xfs_acl_exists(inode, SGI_ACL_DEFAULT); -} - -/* - * No need for i_mutex because the inode is not yet exposed to the VFS. - */ -int -xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) -{ - umode_t mode = inode->i_mode; - int error = 0, inherit = 0; - - if (S_ISDIR(inode->i_mode)) { - error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); - if (error) - goto out; - } - - error = posix_acl_create(&acl, GFP_KERNEL, &mode); - if (error < 0) - return error; - - /* - * If posix_acl_create returns a positive value we need to - * inherit a permission that can't be represented using the Unix - * mode bits and we actually need to set an ACL. - */ - if (error > 0) - inherit = 1; - - error = xfs_set_mode(inode, mode); - if (error) - goto out; - - if (inherit) - error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); - -out: - posix_acl_release(acl); - return error; -} - -int -xfs_acl_chmod(struct inode *inode) -{ - struct posix_acl *acl; - int error; - - if (S_ISLNK(inode->i_mode)) - return -EOPNOTSUPP; - - acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl) || !acl) - return PTR_ERR(acl); - - error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); - if (error) - return error; - - error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); - posix_acl_release(acl); - return error; -} - -static int -xfs_xattr_acl_get(struct dentry *dentry, const char *name, - void *value, size_t size, int type) -{ - struct posix_acl *acl; - int error; - - acl = xfs_get_acl(dentry->d_inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl == NULL) - return -ENODATA; - - error = posix_acl_to_xattr(acl, value, size); - posix_acl_release(acl); - - return error; -} - -static int -xfs_xattr_acl_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags, int type) -{ - struct inode *inode = dentry->d_inode; - struct posix_acl *acl = NULL; - int error = 0; - - if (flags & XATTR_CREATE) - return -EINVAL; - if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) - return value ? -EACCES : 0; - if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; - - if (!value) - goto set_acl; - - acl = posix_acl_from_xattr(value, size); - if (!acl) { - /* - * acl_set_file(3) may request that we set default ACLs with - * zero length -- defend (gracefully) against that here. - */ - goto out; - } - if (IS_ERR(acl)) { - error = PTR_ERR(acl); - goto out; - } - - error = posix_acl_valid(acl); - if (error) - goto out_release; - - error = -EINVAL; - if (acl->a_count > XFS_ACL_MAX_ENTRIES) - goto out_release; - - if (type == ACL_TYPE_ACCESS) { - umode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); - - if (error <= 0) { - posix_acl_release(acl); - acl = NULL; - - if (error < 0) - return error; - } - - error = xfs_set_mode(inode, mode); - if (error) - goto out_release; - } - - set_acl: - error = xfs_set_acl(inode, type, acl); - out_release: - posix_acl_release(acl); - out: - return error; -} - -const struct xattr_handler xfs_xattr_acl_access_handler = { - .prefix = POSIX_ACL_XATTR_ACCESS, - .flags = ACL_TYPE_ACCESS, - .get = xfs_xattr_acl_get, - .set = xfs_xattr_acl_set, -}; - -const struct xattr_handler xfs_xattr_acl_default_handler = { - .prefix = POSIX_ACL_XATTR_DEFAULT, - .flags = ACL_TYPE_DEFAULT, - .get = xfs_xattr_acl_get, - .set = xfs_xattr_acl_set, -}; diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c deleted file mode 100644 index 63e971e2b837..000000000000 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ /dev/null @@ -1,1499 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_trans.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_alloc.h" -#include "xfs_error.h" -#include "xfs_rw.h" -#include "xfs_iomap.h" -#include "xfs_vnodeops.h" -#include "xfs_trace.h" -#include "xfs_bmap.h" -#include -#include -#include -#include - - -/* - * Prime number of hash buckets since address is used as the key. - */ -#define NVSYNC 37 -#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) -static wait_queue_head_t xfs_ioend_wq[NVSYNC]; - -void __init -xfs_ioend_init(void) -{ - int i; - - for (i = 0; i < NVSYNC; i++) - init_waitqueue_head(&xfs_ioend_wq[i]); -} - -void -xfs_ioend_wait( - xfs_inode_t *ip) -{ - wait_queue_head_t *wq = to_ioend_wq(ip); - - wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); -} - -STATIC void -xfs_ioend_wake( - xfs_inode_t *ip) -{ - if (atomic_dec_and_test(&ip->i_iocount)) - wake_up(to_ioend_wq(ip)); -} - -void -xfs_count_page_state( - struct page *page, - int *delalloc, - int *unwritten) -{ - struct buffer_head *bh, *head; - - *delalloc = *unwritten = 0; - - bh = head = page_buffers(page); - do { - if (buffer_unwritten(bh)) - (*unwritten) = 1; - else if (buffer_delay(bh)) - (*delalloc) = 1; - } while ((bh = bh->b_this_page) != head); -} - -STATIC struct block_device * -xfs_find_bdev_for_inode( - struct inode *inode) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - - if (XFS_IS_REALTIME_INODE(ip)) - return mp->m_rtdev_targp->bt_bdev; - else - return mp->m_ddev_targp->bt_bdev; -} - -/* - * We're now finished for good with this ioend structure. - * Update the page state via the associated buffer_heads, - * release holds on the inode and bio, and finally free - * up memory. Do not use the ioend after this. - */ -STATIC void -xfs_destroy_ioend( - xfs_ioend_t *ioend) -{ - struct buffer_head *bh, *next; - struct xfs_inode *ip = XFS_I(ioend->io_inode); - - for (bh = ioend->io_buffer_head; bh; bh = next) { - next = bh->b_private; - bh->b_end_io(bh, !ioend->io_error); - } - - /* - * Volume managers supporting multiple paths can send back ENODEV - * when the final path disappears. In this case continuing to fill - * the page cache with dirty data which cannot be written out is - * evil, so prevent that. - */ - if (unlikely(ioend->io_error == -ENODEV)) { - xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, - __FILE__, __LINE__); - } - - xfs_ioend_wake(ip); - mempool_free(ioend, xfs_ioend_pool); -} - -/* - * If the end of the current ioend is beyond the current EOF, - * return the new EOF value, otherwise zero. - */ -STATIC xfs_fsize_t -xfs_ioend_new_eof( - xfs_ioend_t *ioend) -{ - xfs_inode_t *ip = XFS_I(ioend->io_inode); - xfs_fsize_t isize; - xfs_fsize_t bsize; - - bsize = ioend->io_offset + ioend->io_size; - isize = MAX(ip->i_size, ip->i_new_size); - isize = MIN(isize, bsize); - return isize > ip->i_d.di_size ? isize : 0; -} - -/* - * Update on-disk file size now that data has been written to disk. The - * current in-memory file size is i_size. If a write is beyond eof i_new_size - * will be the intended file size until i_size is updated. If this write does - * not extend all the way to the valid file size then restrict this update to - * the end of the write. - * - * This function does not block as blocking on the inode lock in IO completion - * can lead to IO completion order dependency deadlocks.. If it can't get the - * inode ilock it will return EAGAIN. Callers must handle this. - */ -STATIC int -xfs_setfilesize( - xfs_ioend_t *ioend) -{ - xfs_inode_t *ip = XFS_I(ioend->io_inode); - xfs_fsize_t isize; - - if (unlikely(ioend->io_error)) - return 0; - - if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) - return EAGAIN; - - isize = xfs_ioend_new_eof(ioend); - if (isize) { - trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); - ip->i_d.di_size = isize; - xfs_mark_inode_dirty(ip); - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return 0; -} - -/* - * Schedule IO completion handling on the final put of an ioend. - */ -STATIC void -xfs_finish_ioend( - struct xfs_ioend *ioend) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) { - if (ioend->io_type == IO_UNWRITTEN) - queue_work(xfsconvertd_workqueue, &ioend->io_work); - else - queue_work(xfsdatad_workqueue, &ioend->io_work); - } -} - -/* - * IO write completion. - */ -STATIC void -xfs_end_io( - struct work_struct *work) -{ - xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); - struct xfs_inode *ip = XFS_I(ioend->io_inode); - int error = 0; - - /* - * For unwritten extents we need to issue transactions to convert a - * range to normal written extens after the data I/O has finished. - */ - if (ioend->io_type == IO_UNWRITTEN && - likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { - - error = xfs_iomap_write_unwritten(ip, ioend->io_offset, - ioend->io_size); - if (error) - ioend->io_error = error; - } - - /* - * We might have to update the on-disk file size after extending - * writes. - */ - error = xfs_setfilesize(ioend); - ASSERT(!error || error == EAGAIN); - - /* - * If we didn't complete processing of the ioend, requeue it to the - * tail of the workqueue for another attempt later. Otherwise destroy - * it. - */ - if (error == EAGAIN) { - atomic_inc(&ioend->io_remaining); - xfs_finish_ioend(ioend); - /* ensure we don't spin on blocked ioends */ - delay(1); - } else { - if (ioend->io_iocb) - aio_complete(ioend->io_iocb, ioend->io_result, 0); - xfs_destroy_ioend(ioend); - } -} - -/* - * Call IO completion handling in caller context on the final put of an ioend. - */ -STATIC void -xfs_finish_ioend_sync( - struct xfs_ioend *ioend) -{ - if (atomic_dec_and_test(&ioend->io_remaining)) - xfs_end_io(&ioend->io_work); -} - -/* - * Allocate and initialise an IO completion structure. - * We need to track unwritten extent write completion here initially. - * We'll need to extend this for updating the ondisk inode size later - * (vs. incore size). - */ -STATIC xfs_ioend_t * -xfs_alloc_ioend( - struct inode *inode, - unsigned int type) -{ - xfs_ioend_t *ioend; - - ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); - - /* - * Set the count to 1 initially, which will prevent an I/O - * completion callback from happening before we have started - * all the I/O from calling the completion routine too early. - */ - atomic_set(&ioend->io_remaining, 1); - ioend->io_error = 0; - ioend->io_list = NULL; - ioend->io_type = type; - ioend->io_inode = inode; - ioend->io_buffer_head = NULL; - ioend->io_buffer_tail = NULL; - atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); - ioend->io_offset = 0; - ioend->io_size = 0; - ioend->io_iocb = NULL; - ioend->io_result = 0; - - INIT_WORK(&ioend->io_work, xfs_end_io); - return ioend; -} - -STATIC int -xfs_map_blocks( - struct inode *inode, - loff_t offset, - struct xfs_bmbt_irec *imap, - int type, - int nonblocking) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - ssize_t count = 1 << inode->i_blkbits; - xfs_fileoff_t offset_fsb, end_fsb; - int error = 0; - int bmapi_flags = XFS_BMAPI_ENTIRE; - int nimaps = 1; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - if (type == IO_UNWRITTEN) - bmapi_flags |= XFS_BMAPI_IGSTATE; - - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { - if (nonblocking) - return -XFS_ERROR(EAGAIN); - xfs_ilock(ip, XFS_ILOCK_SHARED); - } - - ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || - (ip->i_df.if_flags & XFS_IFEXTENTS)); - ASSERT(offset <= mp->m_maxioffset); - - if (offset + count > mp->m_maxioffset) - count = mp->m_maxioffset - offset; - end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); - offset_fsb = XFS_B_TO_FSBT(mp, offset); - error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, - bmapi_flags, NULL, 0, imap, &nimaps, NULL); - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (error) - return -XFS_ERROR(error); - - if (type == IO_DELALLOC && - (!nimaps || isnullstartblock(imap->br_startblock))) { - error = xfs_iomap_write_allocate(ip, offset, count, imap); - if (!error) - trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); - return -XFS_ERROR(error); - } - -#ifdef DEBUG - if (type == IO_UNWRITTEN) { - ASSERT(nimaps); - ASSERT(imap->br_startblock != HOLESTARTBLOCK); - ASSERT(imap->br_startblock != DELAYSTARTBLOCK); - } -#endif - if (nimaps) - trace_xfs_map_blocks_found(ip, offset, count, type, imap); - return 0; -} - -STATIC int -xfs_imap_valid( - struct inode *inode, - struct xfs_bmbt_irec *imap, - xfs_off_t offset) -{ - offset >>= inode->i_blkbits; - - return offset >= imap->br_startoff && - offset < imap->br_startoff + imap->br_blockcount; -} - -/* - * BIO completion handler for buffered IO. - */ -STATIC void -xfs_end_bio( - struct bio *bio, - int error) -{ - xfs_ioend_t *ioend = bio->bi_private; - - ASSERT(atomic_read(&bio->bi_cnt) >= 1); - ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; - - /* Toss bio and pass work off to an xfsdatad thread */ - bio->bi_private = NULL; - bio->bi_end_io = NULL; - bio_put(bio); - - xfs_finish_ioend(ioend); -} - -STATIC void -xfs_submit_ioend_bio( - struct writeback_control *wbc, - xfs_ioend_t *ioend, - struct bio *bio) -{ - atomic_inc(&ioend->io_remaining); - bio->bi_private = ioend; - bio->bi_end_io = xfs_end_bio; - - /* - * If the I/O is beyond EOF we mark the inode dirty immediately - * but don't update the inode size until I/O completion. - */ - if (xfs_ioend_new_eof(ioend)) - xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); - - submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); -} - -STATIC struct bio * -xfs_alloc_ioend_bio( - struct buffer_head *bh) -{ - int nvecs = bio_get_nr_vecs(bh->b_bdev); - struct bio *bio = bio_alloc(GFP_NOIO, nvecs); - - ASSERT(bio->bi_private == NULL); - bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); - bio->bi_bdev = bh->b_bdev; - return bio; -} - -STATIC void -xfs_start_buffer_writeback( - struct buffer_head *bh) -{ - ASSERT(buffer_mapped(bh)); - ASSERT(buffer_locked(bh)); - ASSERT(!buffer_delay(bh)); - ASSERT(!buffer_unwritten(bh)); - - mark_buffer_async_write(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); -} - -STATIC void -xfs_start_page_writeback( - struct page *page, - int clear_dirty, - int buffers) -{ - ASSERT(PageLocked(page)); - ASSERT(!PageWriteback(page)); - if (clear_dirty) - clear_page_dirty_for_io(page); - set_page_writeback(page); - unlock_page(page); - /* If no buffers on the page are to be written, finish it here */ - if (!buffers) - end_page_writeback(page); -} - -static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) -{ - return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); -} - -/* - * Submit all of the bios for all of the ioends we have saved up, covering the - * initial writepage page and also any probed pages. - * - * Because we may have multiple ioends spanning a page, we need to start - * writeback on all the buffers before we submit them for I/O. If we mark the - * buffers as we got, then we can end up with a page that only has buffers - * marked async write and I/O complete on can occur before we mark the other - * buffers async write. - * - * The end result of this is that we trip a bug in end_page_writeback() because - * we call it twice for the one page as the code in end_buffer_async_write() - * assumes that all buffers on the page are started at the same time. - * - * The fix is two passes across the ioend list - one to start writeback on the - * buffer_heads, and then submit them for I/O on the second pass. - */ -STATIC void -xfs_submit_ioend( - struct writeback_control *wbc, - xfs_ioend_t *ioend) -{ - xfs_ioend_t *head = ioend; - xfs_ioend_t *next; - struct buffer_head *bh; - struct bio *bio; - sector_t lastblock = 0; - - /* Pass 1 - start writeback */ - do { - next = ioend->io_list; - for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) - xfs_start_buffer_writeback(bh); - } while ((ioend = next) != NULL); - - /* Pass 2 - submit I/O */ - ioend = head; - do { - next = ioend->io_list; - bio = NULL; - - for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { - - if (!bio) { - retry: - bio = xfs_alloc_ioend_bio(bh); - } else if (bh->b_blocknr != lastblock + 1) { - xfs_submit_ioend_bio(wbc, ioend, bio); - goto retry; - } - - if (bio_add_buffer(bio, bh) != bh->b_size) { - xfs_submit_ioend_bio(wbc, ioend, bio); - goto retry; - } - - lastblock = bh->b_blocknr; - } - if (bio) - xfs_submit_ioend_bio(wbc, ioend, bio); - xfs_finish_ioend(ioend); - } while ((ioend = next) != NULL); -} - -/* - * Cancel submission of all buffer_heads so far in this endio. - * Toss the endio too. Only ever called for the initial page - * in a writepage request, so only ever one page. - */ -STATIC void -xfs_cancel_ioend( - xfs_ioend_t *ioend) -{ - xfs_ioend_t *next; - struct buffer_head *bh, *next_bh; - - do { - next = ioend->io_list; - bh = ioend->io_buffer_head; - do { - next_bh = bh->b_private; - clear_buffer_async_write(bh); - unlock_buffer(bh); - } while ((bh = next_bh) != NULL); - - xfs_ioend_wake(XFS_I(ioend->io_inode)); - mempool_free(ioend, xfs_ioend_pool); - } while ((ioend = next) != NULL); -} - -/* - * Test to see if we've been building up a completion structure for - * earlier buffers -- if so, we try to append to this ioend if we - * can, otherwise we finish off any current ioend and start another. - * Return true if we've finished the given ioend. - */ -STATIC void -xfs_add_to_ioend( - struct inode *inode, - struct buffer_head *bh, - xfs_off_t offset, - unsigned int type, - xfs_ioend_t **result, - int need_ioend) -{ - xfs_ioend_t *ioend = *result; - - if (!ioend || need_ioend || type != ioend->io_type) { - xfs_ioend_t *previous = *result; - - ioend = xfs_alloc_ioend(inode, type); - ioend->io_offset = offset; - ioend->io_buffer_head = bh; - ioend->io_buffer_tail = bh; - if (previous) - previous->io_list = ioend; - *result = ioend; - } else { - ioend->io_buffer_tail->b_private = bh; - ioend->io_buffer_tail = bh; - } - - bh->b_private = NULL; - ioend->io_size += bh->b_size; -} - -STATIC void -xfs_map_buffer( - struct inode *inode, - struct buffer_head *bh, - struct xfs_bmbt_irec *imap, - xfs_off_t offset) -{ - sector_t bn; - struct xfs_mount *m = XFS_I(inode)->i_mount; - xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); - xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); - - ASSERT(imap->br_startblock != HOLESTARTBLOCK); - ASSERT(imap->br_startblock != DELAYSTARTBLOCK); - - bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + - ((offset - iomap_offset) >> inode->i_blkbits); - - ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); - - bh->b_blocknr = bn; - set_buffer_mapped(bh); -} - -STATIC void -xfs_map_at_offset( - struct inode *inode, - struct buffer_head *bh, - struct xfs_bmbt_irec *imap, - xfs_off_t offset) -{ - ASSERT(imap->br_startblock != HOLESTARTBLOCK); - ASSERT(imap->br_startblock != DELAYSTARTBLOCK); - - xfs_map_buffer(inode, bh, imap, offset); - set_buffer_mapped(bh); - clear_buffer_delay(bh); - clear_buffer_unwritten(bh); -} - -/* - * Test if a given page is suitable for writing as part of an unwritten - * or delayed allocate extent. - */ -STATIC int -xfs_is_delayed_page( - struct page *page, - unsigned int type) -{ - if (PageWriteback(page)) - return 0; - - if (page->mapping && page_has_buffers(page)) { - struct buffer_head *bh, *head; - int acceptable = 0; - - bh = head = page_buffers(page); - do { - if (buffer_unwritten(bh)) - acceptable = (type == IO_UNWRITTEN); - else if (buffer_delay(bh)) - acceptable = (type == IO_DELALLOC); - else if (buffer_dirty(bh) && buffer_mapped(bh)) - acceptable = (type == IO_OVERWRITE); - else - break; - } while ((bh = bh->b_this_page) != head); - - if (acceptable) - return 1; - } - - return 0; -} - -/* - * Allocate & map buffers for page given the extent map. Write it out. - * except for the original page of a writepage, this is called on - * delalloc/unwritten pages only, for the original page it is possible - * that the page has no mapping at all. - */ -STATIC int -xfs_convert_page( - struct inode *inode, - struct page *page, - loff_t tindex, - struct xfs_bmbt_irec *imap, - xfs_ioend_t **ioendp, - struct writeback_control *wbc) -{ - struct buffer_head *bh, *head; - xfs_off_t end_offset; - unsigned long p_offset; - unsigned int type; - int len, page_dirty; - int count = 0, done = 0, uptodate = 1; - xfs_off_t offset = page_offset(page); - - if (page->index != tindex) - goto fail; - if (!trylock_page(page)) - goto fail; - if (PageWriteback(page)) - goto fail_unlock_page; - if (page->mapping != inode->i_mapping) - goto fail_unlock_page; - if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) - goto fail_unlock_page; - - /* - * page_dirty is initially a count of buffers on the page before - * EOF and is decremented as we move each into a cleanable state. - * - * Derivation: - * - * End offset is the highest offset that this page should represent. - * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) - * will evaluate non-zero and be less than PAGE_CACHE_SIZE and - * hence give us the correct page_dirty count. On any other page, - * it will be zero and in that case we need page_dirty to be the - * count of buffers on the page. - */ - end_offset = min_t(unsigned long long, - (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, - i_size_read(inode)); - - len = 1 << inode->i_blkbits; - p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), - PAGE_CACHE_SIZE); - p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; - page_dirty = p_offset / len; - - bh = head = page_buffers(page); - do { - if (offset >= end_offset) - break; - if (!buffer_uptodate(bh)) - uptodate = 0; - if (!(PageUptodate(page) || buffer_uptodate(bh))) { - done = 1; - continue; - } - - if (buffer_unwritten(bh) || buffer_delay(bh) || - buffer_mapped(bh)) { - if (buffer_unwritten(bh)) - type = IO_UNWRITTEN; - else if (buffer_delay(bh)) - type = IO_DELALLOC; - else - type = IO_OVERWRITE; - - if (!xfs_imap_valid(inode, imap, offset)) { - done = 1; - continue; - } - - lock_buffer(bh); - if (type != IO_OVERWRITE) - xfs_map_at_offset(inode, bh, imap, offset); - xfs_add_to_ioend(inode, bh, offset, type, - ioendp, done); - - page_dirty--; - count++; - } else { - done = 1; - } - } while (offset += len, (bh = bh->b_this_page) != head); - - if (uptodate && bh == head) - SetPageUptodate(page); - - if (count) { - if (--wbc->nr_to_write <= 0 && - wbc->sync_mode == WB_SYNC_NONE) - done = 1; - } - xfs_start_page_writeback(page, !page_dirty, count); - - return done; - fail_unlock_page: - unlock_page(page); - fail: - return 1; -} - -/* - * Convert & write out a cluster of pages in the same extent as defined - * by mp and following the start page. - */ -STATIC void -xfs_cluster_write( - struct inode *inode, - pgoff_t tindex, - struct xfs_bmbt_irec *imap, - xfs_ioend_t **ioendp, - struct writeback_control *wbc, - pgoff_t tlast) -{ - struct pagevec pvec; - int done = 0, i; - - pagevec_init(&pvec, 0); - while (!done && tindex <= tlast) { - unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); - - if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) - break; - - for (i = 0; i < pagevec_count(&pvec); i++) { - done = xfs_convert_page(inode, pvec.pages[i], tindex++, - imap, ioendp, wbc); - if (done) - break; - } - - pagevec_release(&pvec); - cond_resched(); - } -} - -STATIC void -xfs_vm_invalidatepage( - struct page *page, - unsigned long offset) -{ - trace_xfs_invalidatepage(page->mapping->host, page, offset); - block_invalidatepage(page, offset); -} - -/* - * If the page has delalloc buffers on it, we need to punch them out before we - * invalidate the page. If we don't, we leave a stale delalloc mapping on the - * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read - * is done on that same region - the delalloc extent is returned when none is - * supposed to be there. - * - * We prevent this by truncating away the delalloc regions on the page before - * invalidating it. Because they are delalloc, we can do this without needing a - * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this - * truncation without a transaction as there is no space left for block - * reservation (typically why we see a ENOSPC in writeback). - * - * This is not a performance critical path, so for now just do the punching a - * buffer head at a time. - */ -STATIC void -xfs_aops_discard_page( - struct page *page) -{ - struct inode *inode = page->mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct buffer_head *bh, *head; - loff_t offset = page_offset(page); - - if (!xfs_is_delayed_page(page, IO_DELALLOC)) - goto out_invalidate; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - goto out_invalidate; - - xfs_alert(ip->i_mount, - "page discard on page %p, inode 0x%llx, offset %llu.", - page, ip->i_ino, offset); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - bh = head = page_buffers(page); - do { - int error; - xfs_fileoff_t start_fsb; - - if (!buffer_delay(bh)) - goto next_buffer; - - start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); - if (error) { - /* something screwed, just bail */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_alert(ip->i_mount, - "page discard unable to remove delalloc mapping."); - } - break; - } -next_buffer: - offset += 1 << inode->i_blkbits; - - } while ((bh = bh->b_this_page) != head); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); -out_invalidate: - xfs_vm_invalidatepage(page, 0); - return; -} - -/* - * Write out a dirty page. - * - * For delalloc space on the page we need to allocate space and flush it. - * For unwritten space on the page we need to start the conversion to - * regular allocated space. - * For any other dirty buffer heads on the page we should flush them. - */ -STATIC int -xfs_vm_writepage( - struct page *page, - struct writeback_control *wbc) -{ - struct inode *inode = page->mapping->host; - struct buffer_head *bh, *head; - struct xfs_bmbt_irec imap; - xfs_ioend_t *ioend = NULL, *iohead = NULL; - loff_t offset; - unsigned int type; - __uint64_t end_offset; - pgoff_t end_index, last_index; - ssize_t len; - int err, imap_valid = 0, uptodate = 1; - int count = 0; - int nonblocking = 0; - - trace_xfs_writepage(inode, page, 0); - - ASSERT(page_has_buffers(page)); - - /* - * Refuse to write the page out if we are called from reclaim context. - * - * This avoids stack overflows when called from deeply used stacks in - * random callers for direct reclaim or memcg reclaim. We explicitly - * allow reclaim from kswapd as the stack usage there is relatively low. - * - * This should really be done by the core VM, but until that happens - * filesystems like XFS, btrfs and ext4 have to take care of this - * by themselves. - */ - if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) - goto redirty; - - /* - * Given that we do not allow direct reclaim to call us, we should - * never be called while in a filesystem transaction. - */ - if (WARN_ON(current->flags & PF_FSTRANS)) - goto redirty; - - /* Is this page beyond the end of the file? */ - offset = i_size_read(inode); - end_index = offset >> PAGE_CACHE_SHIFT; - last_index = (offset - 1) >> PAGE_CACHE_SHIFT; - if (page->index >= end_index) { - if ((page->index >= end_index + 1) || - !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { - unlock_page(page); - return 0; - } - } - - end_offset = min_t(unsigned long long, - (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, - offset); - len = 1 << inode->i_blkbits; - - bh = head = page_buffers(page); - offset = page_offset(page); - type = IO_OVERWRITE; - - if (wbc->sync_mode == WB_SYNC_NONE) - nonblocking = 1; - - do { - int new_ioend = 0; - - if (offset >= end_offset) - break; - if (!buffer_uptodate(bh)) - uptodate = 0; - - /* - * set_page_dirty dirties all buffers in a page, independent - * of their state. The dirty state however is entirely - * meaningless for holes (!mapped && uptodate), so skip - * buffers covering holes here. - */ - if (!buffer_mapped(bh) && buffer_uptodate(bh)) { - imap_valid = 0; - continue; - } - - if (buffer_unwritten(bh)) { - if (type != IO_UNWRITTEN) { - type = IO_UNWRITTEN; - imap_valid = 0; - } - } else if (buffer_delay(bh)) { - if (type != IO_DELALLOC) { - type = IO_DELALLOC; - imap_valid = 0; - } - } else if (buffer_uptodate(bh)) { - if (type != IO_OVERWRITE) { - type = IO_OVERWRITE; - imap_valid = 0; - } - } else { - if (PageUptodate(page)) { - ASSERT(buffer_mapped(bh)); - imap_valid = 0; - } - continue; - } - - if (imap_valid) - imap_valid = xfs_imap_valid(inode, &imap, offset); - if (!imap_valid) { - /* - * If we didn't have a valid mapping then we need to - * put the new mapping into a separate ioend structure. - * This ensures non-contiguous extents always have - * separate ioends, which is particularly important - * for unwritten extent conversion at I/O completion - * time. - */ - new_ioend = 1; - err = xfs_map_blocks(inode, offset, &imap, type, - nonblocking); - if (err) - goto error; - imap_valid = xfs_imap_valid(inode, &imap, offset); - } - if (imap_valid) { - lock_buffer(bh); - if (type != IO_OVERWRITE) - xfs_map_at_offset(inode, bh, &imap, offset); - xfs_add_to_ioend(inode, bh, offset, type, &ioend, - new_ioend); - count++; - } - - if (!iohead) - iohead = ioend; - - } while (offset += len, ((bh = bh->b_this_page) != head)); - - if (uptodate && bh == head) - SetPageUptodate(page); - - xfs_start_page_writeback(page, 1, count); - - if (ioend && imap_valid) { - xfs_off_t end_index; - - end_index = imap.br_startoff + imap.br_blockcount; - - /* to bytes */ - end_index <<= inode->i_blkbits; - - /* to pages */ - end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; - - /* check against file size */ - if (end_index > last_index) - end_index = last_index; - - xfs_cluster_write(inode, page->index + 1, &imap, &ioend, - wbc, end_index); - } - - if (iohead) - xfs_submit_ioend(wbc, iohead); - - return 0; - -error: - if (iohead) - xfs_cancel_ioend(iohead); - - if (err == -EAGAIN) - goto redirty; - - xfs_aops_discard_page(page); - ClearPageUptodate(page); - unlock_page(page); - return err; - -redirty: - redirty_page_for_writepage(wbc, page); - unlock_page(page); - return 0; -} - -STATIC int -xfs_vm_writepages( - struct address_space *mapping, - struct writeback_control *wbc) -{ - xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); - return generic_writepages(mapping, wbc); -} - -/* - * Called to move a page into cleanable state - and from there - * to be released. The page should already be clean. We always - * have buffer heads in this call. - * - * Returns 1 if the page is ok to release, 0 otherwise. - */ -STATIC int -xfs_vm_releasepage( - struct page *page, - gfp_t gfp_mask) -{ - int delalloc, unwritten; - - trace_xfs_releasepage(page->mapping->host, page, 0); - - xfs_count_page_state(page, &delalloc, &unwritten); - - if (WARN_ON(delalloc)) - return 0; - if (WARN_ON(unwritten)) - return 0; - - return try_to_free_buffers(page); -} - -STATIC int -__xfs_get_blocks( - struct inode *inode, - sector_t iblock, - struct buffer_head *bh_result, - int create, - int direct) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t offset_fsb, end_fsb; - int error = 0; - int lockmode = 0; - struct xfs_bmbt_irec imap; - int nimaps = 1; - xfs_off_t offset; - ssize_t size; - int new = 0; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - offset = (xfs_off_t)iblock << inode->i_blkbits; - ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); - size = bh_result->b_size; - - if (!create && direct && offset >= i_size_read(inode)) - return 0; - - if (create) { - lockmode = XFS_ILOCK_EXCL; - xfs_ilock(ip, lockmode); - } else { - lockmode = xfs_ilock_map_shared(ip); - } - - ASSERT(offset <= mp->m_maxioffset); - if (offset + size > mp->m_maxioffset) - size = mp->m_maxioffset - offset; - end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); - offset_fsb = XFS_B_TO_FSBT(mp, offset); - - error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, - XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); - if (error) - goto out_unlock; - - if (create && - (!nimaps || - (imap.br_startblock == HOLESTARTBLOCK || - imap.br_startblock == DELAYSTARTBLOCK))) { - if (direct) { - error = xfs_iomap_write_direct(ip, offset, size, - &imap, nimaps); - } else { - error = xfs_iomap_write_delay(ip, offset, size, &imap); - } - if (error) - goto out_unlock; - - trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); - } else if (nimaps) { - trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); - } else { - trace_xfs_get_blocks_notfound(ip, offset, size); - goto out_unlock; - } - xfs_iunlock(ip, lockmode); - - if (imap.br_startblock != HOLESTARTBLOCK && - imap.br_startblock != DELAYSTARTBLOCK) { - /* - * For unwritten extents do not report a disk address on - * the read case (treat as if we're reading into a hole). - */ - if (create || !ISUNWRITTEN(&imap)) - xfs_map_buffer(inode, bh_result, &imap, offset); - if (create && ISUNWRITTEN(&imap)) { - if (direct) - bh_result->b_private = inode; - set_buffer_unwritten(bh_result); - } - } - - /* - * If this is a realtime file, data may be on a different device. - * to that pointed to from the buffer_head b_bdev currently. - */ - bh_result->b_bdev = xfs_find_bdev_for_inode(inode); - - /* - * If we previously allocated a block out beyond eof and we are now - * coming back to use it then we will need to flag it as new even if it - * has a disk address. - * - * With sub-block writes into unwritten extents we also need to mark - * the buffer as new so that the unwritten parts of the buffer gets - * correctly zeroed. - */ - if (create && - ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || - (offset >= i_size_read(inode)) || - (new || ISUNWRITTEN(&imap)))) - set_buffer_new(bh_result); - - if (imap.br_startblock == DELAYSTARTBLOCK) { - BUG_ON(direct); - if (create) { - set_buffer_uptodate(bh_result); - set_buffer_mapped(bh_result); - set_buffer_delay(bh_result); - } - } - - /* - * If this is O_DIRECT or the mpage code calling tell them how large - * the mapping is, so that we can avoid repeated get_blocks calls. - */ - if (direct || size > (1 << inode->i_blkbits)) { - xfs_off_t mapping_size; - - mapping_size = imap.br_startoff + imap.br_blockcount - iblock; - mapping_size <<= inode->i_blkbits; - - ASSERT(mapping_size > 0); - if (mapping_size > size) - mapping_size = size; - if (mapping_size > LONG_MAX) - mapping_size = LONG_MAX; - - bh_result->b_size = mapping_size; - } - - return 0; - -out_unlock: - xfs_iunlock(ip, lockmode); - return -error; -} - -int -xfs_get_blocks( - struct inode *inode, - sector_t iblock, - struct buffer_head *bh_result, - int create) -{ - return __xfs_get_blocks(inode, iblock, bh_result, create, 0); -} - -STATIC int -xfs_get_blocks_direct( - struct inode *inode, - sector_t iblock, - struct buffer_head *bh_result, - int create) -{ - return __xfs_get_blocks(inode, iblock, bh_result, create, 1); -} - -/* - * Complete a direct I/O write request. - * - * If the private argument is non-NULL __xfs_get_blocks signals us that we - * need to issue a transaction to convert the range from unwritten to written - * extents. In case this is regular synchronous I/O we just call xfs_end_io - * to do this and we are done. But in case this was a successful AIO - * request this handler is called from interrupt context, from which we - * can't start transactions. In that case offload the I/O completion to - * the workqueues we also use for buffered I/O completion. - */ -STATIC void -xfs_end_io_direct_write( - struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private, - int ret, - bool is_async) -{ - struct xfs_ioend *ioend = iocb->private; - - /* - * blockdev_direct_IO can return an error even after the I/O - * completion handler was called. Thus we need to protect - * against double-freeing. - */ - iocb->private = NULL; - - ioend->io_offset = offset; - ioend->io_size = size; - if (private && size > 0) - ioend->io_type = IO_UNWRITTEN; - - if (is_async) { - /* - * If we are converting an unwritten extent we need to delay - * the AIO completion until after the unwrittent extent - * conversion has completed, otherwise do it ASAP. - */ - if (ioend->io_type == IO_UNWRITTEN) { - ioend->io_iocb = iocb; - ioend->io_result = ret; - } else { - aio_complete(iocb, ret, 0); - } - xfs_finish_ioend(ioend); - } else { - xfs_finish_ioend_sync(ioend); - } - - /* XXX: probably should move into the real I/O completion handler */ - inode_dio_done(ioend->io_inode); -} - -STATIC ssize_t -xfs_vm_direct_IO( - int rw, - struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, - unsigned long nr_segs) -{ - struct inode *inode = iocb->ki_filp->f_mapping->host; - struct block_device *bdev = xfs_find_bdev_for_inode(inode); - ssize_t ret; - - if (rw & WRITE) { - iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); - - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, - xfs_end_io_direct_write, NULL, 0); - if (ret != -EIOCBQUEUED && iocb->private) - xfs_destroy_ioend(iocb->private); - } else { - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, - NULL, NULL, 0); - } - - return ret; -} - -STATIC void -xfs_vm_write_failed( - struct address_space *mapping, - loff_t to) -{ - struct inode *inode = mapping->host; - - if (to > inode->i_size) { - /* - * punch out the delalloc blocks we have already allocated. We - * don't call xfs_setattr() to do this as we may be in the - * middle of a multi-iovec write and so the vfs inode->i_size - * will not match the xfs ip->i_size and so it will zero too - * much. Hence we jus truncate the page cache to zero what is - * necessary and punch the delalloc blocks directly. - */ - struct xfs_inode *ip = XFS_I(inode); - xfs_fileoff_t start_fsb; - xfs_fileoff_t end_fsb; - int error; - - truncate_pagecache(inode, to, inode->i_size); - - /* - * Check if there are any blocks that are outside of i_size - * that need to be trimmed back. - */ - start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; - end_fsb = XFS_B_TO_FSB(ip->i_mount, to); - if (end_fsb <= start_fsb) - return; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - end_fsb - start_fsb); - if (error) { - /* something screwed, just bail */ - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_alert(ip->i_mount, - "xfs_vm_write_failed: unable to clean up ino %lld", - ip->i_ino); - } - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } -} - -STATIC int -xfs_vm_write_begin( - struct file *file, - struct address_space *mapping, - loff_t pos, - unsigned len, - unsigned flags, - struct page **pagep, - void **fsdata) -{ - int ret; - - ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, - pagep, xfs_get_blocks); - if (unlikely(ret)) - xfs_vm_write_failed(mapping, pos + len); - return ret; -} - -STATIC int -xfs_vm_write_end( - struct file *file, - struct address_space *mapping, - loff_t pos, - unsigned len, - unsigned copied, - struct page *page, - void *fsdata) -{ - int ret; - - ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - if (unlikely(ret < len)) - xfs_vm_write_failed(mapping, pos + len); - return ret; -} - -STATIC sector_t -xfs_vm_bmap( - struct address_space *mapping, - sector_t block) -{ - struct inode *inode = (struct inode *)mapping->host; - struct xfs_inode *ip = XFS_I(inode); - - trace_xfs_vm_bmap(XFS_I(inode)); - xfs_ilock(ip, XFS_IOLOCK_SHARED); - xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - return generic_block_bmap(mapping, block, xfs_get_blocks); -} - -STATIC int -xfs_vm_readpage( - struct file *unused, - struct page *page) -{ - return mpage_readpage(page, xfs_get_blocks); -} - -STATIC int -xfs_vm_readpages( - struct file *unused, - struct address_space *mapping, - struct list_head *pages, - unsigned nr_pages) -{ - return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); -} - -const struct address_space_operations xfs_address_space_operations = { - .readpage = xfs_vm_readpage, - .readpages = xfs_vm_readpages, - .writepage = xfs_vm_writepage, - .writepages = xfs_vm_writepages, - .releasepage = xfs_vm_releasepage, - .invalidatepage = xfs_vm_invalidatepage, - .write_begin = xfs_vm_write_begin, - .write_end = xfs_vm_write_end, - .bmap = xfs_vm_bmap, - .direct_IO = xfs_vm_direct_IO, - .migratepage = buffer_migrate_page, - .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, -}; diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h deleted file mode 100644 index 71f721e1a71f..000000000000 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2005-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_AOPS_H__ -#define __XFS_AOPS_H__ - -extern struct workqueue_struct *xfsdatad_workqueue; -extern struct workqueue_struct *xfsconvertd_workqueue; -extern mempool_t *xfs_ioend_pool; - -/* - * Types of I/O for bmap clustering and I/O completion tracking. - */ -enum { - IO_DIRECT = 0, /* special case for direct I/O ioends */ - IO_DELALLOC, /* mapping covers delalloc region */ - IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ - IO_OVERWRITE, /* mapping covers already allocated extent */ -}; - -#define XFS_IO_TYPES \ - { 0, "" }, \ - { IO_DELALLOC, "delalloc" }, \ - { IO_UNWRITTEN, "unwritten" }, \ - { IO_OVERWRITE, "overwrite" } - -/* - * xfs_ioend struct manages large extent writes for XFS. - * It can manage several multi-page bio's at once. - */ -typedef struct xfs_ioend { - struct xfs_ioend *io_list; /* next ioend in chain */ - unsigned int io_type; /* delalloc / unwritten */ - int io_error; /* I/O error code */ - atomic_t io_remaining; /* hold count */ - struct inode *io_inode; /* file being written to */ - struct buffer_head *io_buffer_head;/* buffer linked list head */ - struct buffer_head *io_buffer_tail;/* buffer linked list tail */ - size_t io_size; /* size of the extent */ - xfs_off_t io_offset; /* offset in the file */ - struct work_struct io_work; /* xfsdatad work queue */ - struct kiocb *io_iocb; - int io_result; -} xfs_ioend_t; - -extern const struct address_space_operations xfs_address_space_operations; -extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); - -extern void xfs_ioend_init(void); -extern void xfs_ioend_wait(struct xfs_inode *); - -extern void xfs_count_page_state(struct page *, int *, int *); - -#endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c deleted file mode 100644 index c57836dc778f..000000000000 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ /dev/null @@ -1,1876 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xfs_sb.h" -#include "xfs_inum.h" -#include "xfs_log.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_trace.h" - -static kmem_zone_t *xfs_buf_zone; -STATIC int xfsbufd(void *); -STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); - -static struct workqueue_struct *xfslogd_workqueue; -struct workqueue_struct *xfsdatad_workqueue; -struct workqueue_struct *xfsconvertd_workqueue; - -#ifdef XFS_BUF_LOCK_TRACKING -# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) -# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) -# define XB_GET_OWNER(bp) ((bp)->b_last_holder) -#else -# define XB_SET_OWNER(bp) do { } while (0) -# define XB_CLEAR_OWNER(bp) do { } while (0) -# define XB_GET_OWNER(bp) do { } while (0) -#endif - -#define xb_to_gfp(flags) \ - ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \ - ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) - -#define xb_to_km(flags) \ - (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) - -#define xfs_buf_allocate(flags) \ - kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)) -#define xfs_buf_deallocate(bp) \ - kmem_zone_free(xfs_buf_zone, (bp)); - -static inline int -xfs_buf_is_vmapped( - struct xfs_buf *bp) -{ - /* - * Return true if the buffer is vmapped. - * - * The XBF_MAPPED flag is set if the buffer should be mapped, but the - * code is clever enough to know it doesn't have to map a single page, - * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. - */ - return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; -} - -static inline int -xfs_buf_vmap_len( - struct xfs_buf *bp) -{ - return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; -} - -/* - * xfs_buf_lru_add - add a buffer to the LRU. - * - * The LRU takes a new reference to the buffer so that it will only be freed - * once the shrinker takes the buffer off the LRU. - */ -STATIC void -xfs_buf_lru_add( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - - spin_lock(&btp->bt_lru_lock); - if (list_empty(&bp->b_lru)) { - atomic_inc(&bp->b_hold); - list_add_tail(&bp->b_lru, &btp->bt_lru); - btp->bt_lru_nr++; - } - spin_unlock(&btp->bt_lru_lock); -} - -/* - * xfs_buf_lru_del - remove a buffer from the LRU - * - * The unlocked check is safe here because it only occurs when there are not - * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there - * to optimise the shrinker removing the buffer from the LRU and calling - * xfs_buf_free(). i.e. it removes an unnecessary round trip on the - * bt_lru_lock. - */ -STATIC void -xfs_buf_lru_del( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - - if (list_empty(&bp->b_lru)) - return; - - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - } - spin_unlock(&btp->bt_lru_lock); -} - -/* - * When we mark a buffer stale, we remove the buffer from the LRU and clear the - * b_lru_ref count so that the buffer is freed immediately when the buffer - * reference count falls to zero. If the buffer is already on the LRU, we need - * to remove the reference that LRU holds on the buffer. - * - * This prevents build-up of stale buffers on the LRU. - */ -void -xfs_buf_stale( - struct xfs_buf *bp) -{ - bp->b_flags |= XBF_STALE; - atomic_set(&(bp)->b_lru_ref, 0); - if (!list_empty(&bp->b_lru)) { - struct xfs_buftarg *btp = bp->b_target; - - spin_lock(&btp->bt_lru_lock); - if (!list_empty(&bp->b_lru)) { - list_del_init(&bp->b_lru); - btp->bt_lru_nr--; - atomic_dec(&bp->b_hold); - } - spin_unlock(&btp->bt_lru_lock); - } - ASSERT(atomic_read(&bp->b_hold) >= 1); -} - -STATIC void -_xfs_buf_initialize( - xfs_buf_t *bp, - xfs_buftarg_t *target, - xfs_off_t range_base, - size_t range_length, - xfs_buf_flags_t flags) -{ - /* - * We don't want certain flags to appear in b_flags. - */ - flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD); - - memset(bp, 0, sizeof(xfs_buf_t)); - atomic_set(&bp->b_hold, 1); - atomic_set(&bp->b_lru_ref, 1); - init_completion(&bp->b_iowait); - INIT_LIST_HEAD(&bp->b_lru); - INIT_LIST_HEAD(&bp->b_list); - RB_CLEAR_NODE(&bp->b_rbnode); - sema_init(&bp->b_sema, 0); /* held, no waiters */ - XB_SET_OWNER(bp); - bp->b_target = target; - bp->b_file_offset = range_base; - /* - * Set buffer_length and count_desired to the same value initially. - * I/O routines should use count_desired, which will be the same in - * most cases but may be reset (e.g. XFS recovery). - */ - bp->b_buffer_length = bp->b_count_desired = range_length; - bp->b_flags = flags; - bp->b_bn = XFS_BUF_DADDR_NULL; - atomic_set(&bp->b_pin_count, 0); - init_waitqueue_head(&bp->b_waiters); - - XFS_STATS_INC(xb_create); - - trace_xfs_buf_init(bp, _RET_IP_); -} - -/* - * Allocate a page array capable of holding a specified number - * of pages, and point the page buf at it. - */ -STATIC int -_xfs_buf_get_pages( - xfs_buf_t *bp, - int page_count, - xfs_buf_flags_t flags) -{ - /* Make sure that we have a page list */ - if (bp->b_pages == NULL) { - bp->b_offset = xfs_buf_poff(bp->b_file_offset); - bp->b_page_count = page_count; - if (page_count <= XB_PAGES) { - bp->b_pages = bp->b_page_array; - } else { - bp->b_pages = kmem_alloc(sizeof(struct page *) * - page_count, xb_to_km(flags)); - if (bp->b_pages == NULL) - return -ENOMEM; - } - memset(bp->b_pages, 0, sizeof(struct page *) * page_count); - } - return 0; -} - -/* - * Frees b_pages if it was allocated. - */ -STATIC void -_xfs_buf_free_pages( - xfs_buf_t *bp) -{ - if (bp->b_pages != bp->b_page_array) { - kmem_free(bp->b_pages); - bp->b_pages = NULL; - } -} - -/* - * Releases the specified buffer. - * - * The modification state of any associated pages is left unchanged. - * The buffer most not be on any hash - use xfs_buf_rele instead for - * hashed and refcounted buffers - */ -void -xfs_buf_free( - xfs_buf_t *bp) -{ - trace_xfs_buf_free(bp, _RET_IP_); - - ASSERT(list_empty(&bp->b_lru)); - - if (bp->b_flags & _XBF_PAGES) { - uint i; - - if (xfs_buf_is_vmapped(bp)) - vm_unmap_ram(bp->b_addr - bp->b_offset, - bp->b_page_count); - - for (i = 0; i < bp->b_page_count; i++) { - struct page *page = bp->b_pages[i]; - - __free_page(page); - } - } else if (bp->b_flags & _XBF_KMEM) - kmem_free(bp->b_addr); - _xfs_buf_free_pages(bp); - xfs_buf_deallocate(bp); -} - -/* - * Allocates all the pages for buffer in question and builds it's page list. - */ -STATIC int -xfs_buf_allocate_memory( - xfs_buf_t *bp, - uint flags) -{ - size_t size = bp->b_count_desired; - size_t nbytes, offset; - gfp_t gfp_mask = xb_to_gfp(flags); - unsigned short page_count, i; - xfs_off_t end; - int error; - - /* - * for buffers that are contained within a single page, just allocate - * the memory from the heap - there's no need for the complexity of - * page arrays to keep allocation down to order 0. - */ - if (bp->b_buffer_length < PAGE_SIZE) { - bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); - if (!bp->b_addr) { - /* low memory - use alloc_page loop instead */ - goto use_alloc_page; - } - - if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & - PAGE_MASK) != - ((unsigned long)bp->b_addr & PAGE_MASK)) { - /* b_addr spans two pages - use alloc_page instead */ - kmem_free(bp->b_addr); - bp->b_addr = NULL; - goto use_alloc_page; - } - bp->b_offset = offset_in_page(bp->b_addr); - bp->b_pages = bp->b_page_array; - bp->b_pages[0] = virt_to_page(bp->b_addr); - bp->b_page_count = 1; - bp->b_flags |= XBF_MAPPED | _XBF_KMEM; - return 0; - } - -use_alloc_page: - end = bp->b_file_offset + bp->b_buffer_length; - page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); - error = _xfs_buf_get_pages(bp, page_count, flags); - if (unlikely(error)) - return error; - - offset = bp->b_offset; - bp->b_flags |= _XBF_PAGES; - - for (i = 0; i < bp->b_page_count; i++) { - struct page *page; - uint retries = 0; -retry: - page = alloc_page(gfp_mask); - if (unlikely(page == NULL)) { - if (flags & XBF_READ_AHEAD) { - bp->b_page_count = i; - error = ENOMEM; - goto out_free_pages; - } - - /* - * This could deadlock. - * - * But until all the XFS lowlevel code is revamped to - * handle buffer allocation failures we can't do much. - */ - if (!(++retries % 100)) - xfs_err(NULL, - "possible memory allocation deadlock in %s (mode:0x%x)", - __func__, gfp_mask); - - XFS_STATS_INC(xb_page_retries); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; - } - - XFS_STATS_INC(xb_page_found); - - nbytes = min_t(size_t, size, PAGE_SIZE - offset); - size -= nbytes; - bp->b_pages[i] = page; - offset = 0; - } - return 0; - -out_free_pages: - for (i = 0; i < bp->b_page_count; i++) - __free_page(bp->b_pages[i]); - return error; -} - -/* - * Map buffer into kernel address-space if necessary. - */ -STATIC int -_xfs_buf_map_pages( - xfs_buf_t *bp, - uint flags) -{ - ASSERT(bp->b_flags & _XBF_PAGES); - if (bp->b_page_count == 1) { - /* A single page buffer is always mappable */ - bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; - bp->b_flags |= XBF_MAPPED; - } else if (flags & XBF_MAPPED) { - int retried = 0; - - do { - bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, - -1, PAGE_KERNEL); - if (bp->b_addr) - break; - vm_unmap_aliases(); - } while (retried++ <= 1); - - if (!bp->b_addr) - return -ENOMEM; - bp->b_addr += bp->b_offset; - bp->b_flags |= XBF_MAPPED; - } - - return 0; -} - -/* - * Finding and Reading Buffers - */ - -/* - * Look up, and creates if absent, a lockable buffer for - * a given range of an inode. The buffer is returned - * locked. If other overlapping buffers exist, they are - * released before the new buffer is created and locked, - * which may imply that this call will block until those buffers - * are unlocked. No I/O is implied by this call. - */ -xfs_buf_t * -_xfs_buf_find( - xfs_buftarg_t *btp, /* block device target */ - xfs_off_t ioff, /* starting offset of range */ - size_t isize, /* length of range */ - xfs_buf_flags_t flags, - xfs_buf_t *new_bp) -{ - xfs_off_t range_base; - size_t range_length; - struct xfs_perag *pag; - struct rb_node **rbp; - struct rb_node *parent; - xfs_buf_t *bp; - - range_base = (ioff << BBSHIFT); - range_length = (isize << BBSHIFT); - - /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(range_length < (1 << btp->bt_sshift))); - ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); - - /* get tree root */ - pag = xfs_perag_get(btp->bt_mount, - xfs_daddr_to_agno(btp->bt_mount, ioff)); - - /* walk tree */ - spin_lock(&pag->pag_buf_lock); - rbp = &pag->pag_buf_tree.rb_node; - parent = NULL; - bp = NULL; - while (*rbp) { - parent = *rbp; - bp = rb_entry(parent, struct xfs_buf, b_rbnode); - - if (range_base < bp->b_file_offset) - rbp = &(*rbp)->rb_left; - else if (range_base > bp->b_file_offset) - rbp = &(*rbp)->rb_right; - else { - /* - * found a block offset match. If the range doesn't - * match, the only way this is allowed is if the buffer - * in the cache is stale and the transaction that made - * it stale has not yet committed. i.e. we are - * reallocating a busy extent. Skip this buffer and - * continue searching to the right for an exact match. - */ - if (bp->b_buffer_length != range_length) { - ASSERT(bp->b_flags & XBF_STALE); - rbp = &(*rbp)->rb_right; - continue; - } - atomic_inc(&bp->b_hold); - goto found; - } - } - - /* No match found */ - if (new_bp) { - _xfs_buf_initialize(new_bp, btp, range_base, - range_length, flags); - rb_link_node(&new_bp->b_rbnode, parent, rbp); - rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); - /* the buffer keeps the perag reference until it is freed */ - new_bp->b_pag = pag; - spin_unlock(&pag->pag_buf_lock); - } else { - XFS_STATS_INC(xb_miss_locked); - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - } - return new_bp; - -found: - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) { - xfs_buf_rele(bp); - XFS_STATS_INC(xb_busy_locked); - return NULL; - } - xfs_buf_lock(bp); - XFS_STATS_INC(xb_get_locked_waited); - } - - /* - * if the buffer is stale, clear all the external state associated with - * it. We need to keep flags such as how we allocated the buffer memory - * intact here. - */ - if (bp->b_flags & XBF_STALE) { - ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); - bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; - } - - trace_xfs_buf_find(bp, flags, _RET_IP_); - XFS_STATS_INC(xb_get_locked); - return bp; -} - -/* - * Assembles a buffer covering the specified range. - * Storage in memory for all portions of the buffer will be allocated, - * although backing storage may not be. - */ -xfs_buf_t * -xfs_buf_get( - xfs_buftarg_t *target,/* target for buffer */ - xfs_off_t ioff, /* starting offset of range */ - size_t isize, /* length of range */ - xfs_buf_flags_t flags) -{ - xfs_buf_t *bp, *new_bp; - int error = 0; - - new_bp = xfs_buf_allocate(flags); - if (unlikely(!new_bp)) - return NULL; - - bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); - if (bp == new_bp) { - error = xfs_buf_allocate_memory(bp, flags); - if (error) - goto no_buffer; - } else { - xfs_buf_deallocate(new_bp); - if (unlikely(bp == NULL)) - return NULL; - } - - if (!(bp->b_flags & XBF_MAPPED)) { - error = _xfs_buf_map_pages(bp, flags); - if (unlikely(error)) { - xfs_warn(target->bt_mount, - "%s: failed to map pages\n", __func__); - goto no_buffer; - } - } - - XFS_STATS_INC(xb_get); - - /* - * Always fill in the block number now, the mapped cases can do - * their own overlay of this later. - */ - bp->b_bn = ioff; - bp->b_count_desired = bp->b_buffer_length; - - trace_xfs_buf_get(bp, flags, _RET_IP_); - return bp; - - no_buffer: - if (flags & (XBF_LOCK | XBF_TRYLOCK)) - xfs_buf_unlock(bp); - xfs_buf_rele(bp); - return NULL; -} - -STATIC int -_xfs_buf_read( - xfs_buf_t *bp, - xfs_buf_flags_t flags) -{ - int status; - - ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); - ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); - - bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD); - bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); - - status = xfs_buf_iorequest(bp); - if (status || bp->b_error || (flags & XBF_ASYNC)) - return status; - return xfs_buf_iowait(bp); -} - -xfs_buf_t * -xfs_buf_read( - xfs_buftarg_t *target, - xfs_off_t ioff, - size_t isize, - xfs_buf_flags_t flags) -{ - xfs_buf_t *bp; - - flags |= XBF_READ; - - bp = xfs_buf_get(target, ioff, isize, flags); - if (bp) { - trace_xfs_buf_read(bp, flags, _RET_IP_); - - if (!XFS_BUF_ISDONE(bp)) { - XFS_STATS_INC(xb_get_read); - _xfs_buf_read(bp, flags); - } else if (flags & XBF_ASYNC) { - /* - * Read ahead call which is already satisfied, - * drop the buffer - */ - goto no_buffer; - } else { - /* We do not want read in the flags */ - bp->b_flags &= ~XBF_READ; - } - } - - return bp; - - no_buffer: - if (flags & (XBF_LOCK | XBF_TRYLOCK)) - xfs_buf_unlock(bp); - xfs_buf_rele(bp); - return NULL; -} - -/* - * If we are not low on memory then do the readahead in a deadlock - * safe manner. - */ -void -xfs_buf_readahead( - xfs_buftarg_t *target, - xfs_off_t ioff, - size_t isize) -{ - if (bdi_read_congested(target->bt_bdi)) - return; - - xfs_buf_read(target, ioff, isize, - XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); -} - -/* - * Read an uncached buffer from disk. Allocates and returns a locked - * buffer containing the disk contents or nothing. - */ -struct xfs_buf * -xfs_buf_read_uncached( - struct xfs_mount *mp, - struct xfs_buftarg *target, - xfs_daddr_t daddr, - size_t length, - int flags) -{ - xfs_buf_t *bp; - int error; - - bp = xfs_buf_get_uncached(target, length, flags); - if (!bp) - return NULL; - - /* set up the buffer for a read IO */ - XFS_BUF_SET_ADDR(bp, daddr); - XFS_BUF_READ(bp); - - xfsbdstrat(mp, bp); - error = xfs_buf_iowait(bp); - if (error || bp->b_error) { - xfs_buf_relse(bp); - return NULL; - } - return bp; -} - -xfs_buf_t * -xfs_buf_get_empty( - size_t len, - xfs_buftarg_t *target) -{ - xfs_buf_t *bp; - - bp = xfs_buf_allocate(0); - if (bp) - _xfs_buf_initialize(bp, target, 0, len, 0); - return bp; -} - -/* - * Return a buffer allocated as an empty buffer and associated to external - * memory via xfs_buf_associate_memory() back to it's empty state. - */ -void -xfs_buf_set_empty( - struct xfs_buf *bp, - size_t len) -{ - if (bp->b_pages) - _xfs_buf_free_pages(bp); - - bp->b_pages = NULL; - bp->b_page_count = 0; - bp->b_addr = NULL; - bp->b_file_offset = 0; - bp->b_buffer_length = bp->b_count_desired = len; - bp->b_bn = XFS_BUF_DADDR_NULL; - bp->b_flags &= ~XBF_MAPPED; -} - -static inline struct page * -mem_to_page( - void *addr) -{ - if ((!is_vmalloc_addr(addr))) { - return virt_to_page(addr); - } else { - return vmalloc_to_page(addr); - } -} - -int -xfs_buf_associate_memory( - xfs_buf_t *bp, - void *mem, - size_t len) -{ - int rval; - int i = 0; - unsigned long pageaddr; - unsigned long offset; - size_t buflen; - int page_count; - - pageaddr = (unsigned long)mem & PAGE_MASK; - offset = (unsigned long)mem - pageaddr; - buflen = PAGE_ALIGN(len + offset); - page_count = buflen >> PAGE_SHIFT; - - /* Free any previous set of page pointers */ - if (bp->b_pages) - _xfs_buf_free_pages(bp); - - bp->b_pages = NULL; - bp->b_addr = mem; - - rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK); - if (rval) - return rval; - - bp->b_offset = offset; - - for (i = 0; i < bp->b_page_count; i++) { - bp->b_pages[i] = mem_to_page((void *)pageaddr); - pageaddr += PAGE_SIZE; - } - - bp->b_count_desired = len; - bp->b_buffer_length = buflen; - bp->b_flags |= XBF_MAPPED; - - return 0; -} - -xfs_buf_t * -xfs_buf_get_uncached( - struct xfs_buftarg *target, - size_t len, - int flags) -{ - unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; - int error, i; - xfs_buf_t *bp; - - bp = xfs_buf_allocate(0); - if (unlikely(bp == NULL)) - goto fail; - _xfs_buf_initialize(bp, target, 0, len, 0); - - error = _xfs_buf_get_pages(bp, page_count, 0); - if (error) - goto fail_free_buf; - - for (i = 0; i < page_count; i++) { - bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); - if (!bp->b_pages[i]) - goto fail_free_mem; - } - bp->b_flags |= _XBF_PAGES; - - error = _xfs_buf_map_pages(bp, XBF_MAPPED); - if (unlikely(error)) { - xfs_warn(target->bt_mount, - "%s: failed to map pages\n", __func__); - goto fail_free_mem; - } - - trace_xfs_buf_get_uncached(bp, _RET_IP_); - return bp; - - fail_free_mem: - while (--i >= 0) - __free_page(bp->b_pages[i]); - _xfs_buf_free_pages(bp); - fail_free_buf: - xfs_buf_deallocate(bp); - fail: - return NULL; -} - -/* - * Increment reference count on buffer, to hold the buffer concurrently - * with another thread which may release (free) the buffer asynchronously. - * Must hold the buffer already to call this function. - */ -void -xfs_buf_hold( - xfs_buf_t *bp) -{ - trace_xfs_buf_hold(bp, _RET_IP_); - atomic_inc(&bp->b_hold); -} - -/* - * Releases a hold on the specified buffer. If the - * the hold count is 1, calls xfs_buf_free. - */ -void -xfs_buf_rele( - xfs_buf_t *bp) -{ - struct xfs_perag *pag = bp->b_pag; - - trace_xfs_buf_rele(bp, _RET_IP_); - - if (!pag) { - ASSERT(list_empty(&bp->b_lru)); - ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); - if (atomic_dec_and_test(&bp->b_hold)) - xfs_buf_free(bp); - return; - } - - ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); - - ASSERT(atomic_read(&bp->b_hold) > 0); - if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { - if (!(bp->b_flags & XBF_STALE) && - atomic_read(&bp->b_lru_ref)) { - xfs_buf_lru_add(bp); - spin_unlock(&pag->pag_buf_lock); - } else { - xfs_buf_lru_del(bp); - ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); - rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - xfs_buf_free(bp); - } - } -} - - -/* - * Lock a buffer object, if it is not already locked. - * - * If we come across a stale, pinned, locked buffer, we know that we are - * being asked to lock a buffer that has been reallocated. Because it is - * pinned, we know that the log has not been pushed to disk and hence it - * will still be locked. Rather than continuing to have trylock attempts - * fail until someone else pushes the log, push it ourselves before - * returning. This means that the xfsaild will not get stuck trying - * to push on stale inode buffers. - */ -int -xfs_buf_trylock( - struct xfs_buf *bp) -{ - int locked; - - locked = down_trylock(&bp->b_sema) == 0; - if (locked) - XB_SET_OWNER(bp); - else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) - xfs_log_force(bp->b_target->bt_mount, 0); - - trace_xfs_buf_trylock(bp, _RET_IP_); - return locked; -} - -/* - * Lock a buffer object. - * - * If we come across a stale, pinned, locked buffer, we know that we - * are being asked to lock a buffer that has been reallocated. Because - * it is pinned, we know that the log has not been pushed to disk and - * hence it will still be locked. Rather than sleeping until someone - * else pushes the log, push it ourselves before trying to get the lock. - */ -void -xfs_buf_lock( - struct xfs_buf *bp) -{ - trace_xfs_buf_lock(bp, _RET_IP_); - - if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) - xfs_log_force(bp->b_target->bt_mount, 0); - down(&bp->b_sema); - XB_SET_OWNER(bp); - - trace_xfs_buf_lock_done(bp, _RET_IP_); -} - -/* - * Releases the lock on the buffer object. - * If the buffer is marked delwri but is not queued, do so before we - * unlock the buffer as we need to set flags correctly. We also need to - * take a reference for the delwri queue because the unlocker is going to - * drop their's and they don't know we just queued it. - */ -void -xfs_buf_unlock( - struct xfs_buf *bp) -{ - if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { - atomic_inc(&bp->b_hold); - bp->b_flags |= XBF_ASYNC; - xfs_buf_delwri_queue(bp, 0); - } - - XB_CLEAR_OWNER(bp); - up(&bp->b_sema); - - trace_xfs_buf_unlock(bp, _RET_IP_); -} - -STATIC void -xfs_buf_wait_unpin( - xfs_buf_t *bp) -{ - DECLARE_WAITQUEUE (wait, current); - - if (atomic_read(&bp->b_pin_count) == 0) - return; - - add_wait_queue(&bp->b_waiters, &wait); - for (;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (atomic_read(&bp->b_pin_count) == 0) - break; - io_schedule(); - } - remove_wait_queue(&bp->b_waiters, &wait); - set_current_state(TASK_RUNNING); -} - -/* - * Buffer Utility Routines - */ - -STATIC void -xfs_buf_iodone_work( - struct work_struct *work) -{ - xfs_buf_t *bp = - container_of(work, xfs_buf_t, b_iodone_work); - - if (bp->b_iodone) - (*(bp->b_iodone))(bp); - else if (bp->b_flags & XBF_ASYNC) - xfs_buf_relse(bp); -} - -void -xfs_buf_ioend( - xfs_buf_t *bp, - int schedule) -{ - trace_xfs_buf_iodone(bp, _RET_IP_); - - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); - if (bp->b_error == 0) - bp->b_flags |= XBF_DONE; - - if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { - if (schedule) { - INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); - queue_work(xfslogd_workqueue, &bp->b_iodone_work); - } else { - xfs_buf_iodone_work(&bp->b_iodone_work); - } - } else { - complete(&bp->b_iowait); - } -} - -void -xfs_buf_ioerror( - xfs_buf_t *bp, - int error) -{ - ASSERT(error >= 0 && error <= 0xffff); - bp->b_error = (unsigned short)error; - trace_xfs_buf_ioerror(bp, error, _RET_IP_); -} - -int -xfs_bwrite( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - int error; - - bp->b_flags |= XBF_WRITE; - bp->b_flags &= ~(XBF_ASYNC | XBF_READ); - - xfs_buf_delwri_dequeue(bp); - xfs_bdstrat_cb(bp); - - error = xfs_buf_iowait(bp); - if (error) - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); - xfs_buf_relse(bp); - return error; -} - -void -xfs_bdwrite( - void *mp, - struct xfs_buf *bp) -{ - trace_xfs_buf_bdwrite(bp, _RET_IP_); - - bp->b_flags &= ~XBF_READ; - bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); - - xfs_buf_delwri_queue(bp, 1); -} - -/* - * Called when we want to stop a buffer from getting written or read. - * We attach the EIO error, muck with its flags, and call xfs_buf_ioend - * so that the proper iodone callbacks get called. - */ -STATIC int -xfs_bioerror( - xfs_buf_t *bp) -{ -#ifdef XFSERRORDEBUG - ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); -#endif - - /* - * No need to wait until the buffer is unpinned, we aren't flushing it. - */ - xfs_buf_ioerror(bp, EIO); - - /* - * We're calling xfs_buf_ioend, so delete XBF_DONE flag. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_UNDONE(bp); - XFS_BUF_STALE(bp); - - xfs_buf_ioend(bp, 0); - - return EIO; -} - -/* - * Same as xfs_bioerror, except that we are releasing the buffer - * here ourselves, and avoiding the xfs_buf_ioend call. - * This is meant for userdata errors; metadata bufs come with - * iodone functions attached, so that we can track down errors. - */ -STATIC int -xfs_bioerror_relse( - struct xfs_buf *bp) -{ - int64_t fl = bp->b_flags; - /* - * No need to wait until the buffer is unpinned. - * We aren't flushing it. - * - * chunkhold expects B_DONE to be set, whether - * we actually finish the I/O or not. We don't want to - * change that interface. - */ - XFS_BUF_UNREAD(bp); - XFS_BUF_UNDELAYWRITE(bp); - XFS_BUF_DONE(bp); - XFS_BUF_STALE(bp); - bp->b_iodone = NULL; - if (!(fl & XBF_ASYNC)) { - /* - * Mark b_error and B_ERROR _both_. - * Lot's of chunkcache code assumes that. - * There's no reason to mark error for - * ASYNC buffers. - */ - xfs_buf_ioerror(bp, EIO); - XFS_BUF_FINISH_IOWAIT(bp); - } else { - xfs_buf_relse(bp); - } - - return EIO; -} - - -/* - * All xfs metadata buffers except log state machine buffers - * get this attached as their b_bdstrat callback function. - * This is so that we can catch a buffer - * after prematurely unpinning it to forcibly shutdown the filesystem. - */ -int -xfs_bdstrat_cb( - struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - /* - * Metadata write that didn't get logged but - * written delayed anyway. These aren't associated - * with a transaction, and can be ignored. - */ - if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) - return xfs_bioerror_relse(bp); - else - return xfs_bioerror(bp); - } - - xfs_buf_iorequest(bp); - return 0; -} - -/* - * Wrapper around bdstrat so that we can stop data from going to disk in case - * we are shutting down the filesystem. Typically user data goes thru this - * path; one of the exceptions is the superblock. - */ -void -xfsbdstrat( - struct xfs_mount *mp, - struct xfs_buf *bp) -{ - if (XFS_FORCED_SHUTDOWN(mp)) { - trace_xfs_bdstrat_shut(bp, _RET_IP_); - xfs_bioerror_relse(bp); - return; - } - - xfs_buf_iorequest(bp); -} - -STATIC void -_xfs_buf_ioend( - xfs_buf_t *bp, - int schedule) -{ - if (atomic_dec_and_test(&bp->b_io_remaining) == 1) - xfs_buf_ioend(bp, schedule); -} - -STATIC void -xfs_buf_bio_end_io( - struct bio *bio, - int error) -{ - xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; - - xfs_buf_ioerror(bp, -error); - - if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) - invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); - - _xfs_buf_ioend(bp, 1); - bio_put(bio); -} - -STATIC void -_xfs_buf_ioapply( - xfs_buf_t *bp) -{ - int rw, map_i, total_nr_pages, nr_pages; - struct bio *bio; - int offset = bp->b_offset; - int size = bp->b_count_desired; - sector_t sector = bp->b_bn; - - total_nr_pages = bp->b_page_count; - map_i = 0; - - if (bp->b_flags & XBF_WRITE) { - if (bp->b_flags & XBF_SYNCIO) - rw = WRITE_SYNC; - else - rw = WRITE; - if (bp->b_flags & XBF_FUA) - rw |= REQ_FUA; - if (bp->b_flags & XBF_FLUSH) - rw |= REQ_FLUSH; - } else if (bp->b_flags & XBF_READ_AHEAD) { - rw = READA; - } else { - rw = READ; - } - - /* we only use the buffer cache for meta-data */ - rw |= REQ_META; - -next_chunk: - atomic_inc(&bp->b_io_remaining); - nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); - if (nr_pages > total_nr_pages) - nr_pages = total_nr_pages; - - bio = bio_alloc(GFP_NOIO, nr_pages); - bio->bi_bdev = bp->b_target->bt_bdev; - bio->bi_sector = sector; - bio->bi_end_io = xfs_buf_bio_end_io; - bio->bi_private = bp; - - - for (; size && nr_pages; nr_pages--, map_i++) { - int rbytes, nbytes = PAGE_SIZE - offset; - - if (nbytes > size) - nbytes = size; - - rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); - if (rbytes < nbytes) - break; - - offset = 0; - sector += nbytes >> BBSHIFT; - size -= nbytes; - total_nr_pages--; - } - - if (likely(bio->bi_size)) { - if (xfs_buf_is_vmapped(bp)) { - flush_kernel_vmap_range(bp->b_addr, - xfs_buf_vmap_len(bp)); - } - submit_bio(rw, bio); - if (size) - goto next_chunk; - } else { - xfs_buf_ioerror(bp, EIO); - bio_put(bio); - } -} - -int -xfs_buf_iorequest( - xfs_buf_t *bp) -{ - trace_xfs_buf_iorequest(bp, _RET_IP_); - - if (bp->b_flags & XBF_DELWRI) { - xfs_buf_delwri_queue(bp, 1); - return 0; - } - - if (bp->b_flags & XBF_WRITE) { - xfs_buf_wait_unpin(bp); - } - - xfs_buf_hold(bp); - - /* Set the count to 1 initially, this will stop an I/O - * completion callout which happens before we have started - * all the I/O from calling xfs_buf_ioend too early. - */ - atomic_set(&bp->b_io_remaining, 1); - _xfs_buf_ioapply(bp); - _xfs_buf_ioend(bp, 0); - - xfs_buf_rele(bp); - return 0; -} - -/* - * Waits for I/O to complete on the buffer supplied. - * It returns immediately if no I/O is pending. - * It returns the I/O error code, if any, or 0 if there was no error. - */ -int -xfs_buf_iowait( - xfs_buf_t *bp) -{ - trace_xfs_buf_iowait(bp, _RET_IP_); - - wait_for_completion(&bp->b_iowait); - - trace_xfs_buf_iowait_done(bp, _RET_IP_); - return bp->b_error; -} - -xfs_caddr_t -xfs_buf_offset( - xfs_buf_t *bp, - size_t offset) -{ - struct page *page; - - if (bp->b_flags & XBF_MAPPED) - return bp->b_addr + offset; - - offset += bp->b_offset; - page = bp->b_pages[offset >> PAGE_SHIFT]; - return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); -} - -/* - * Move data into or out of a buffer. - */ -void -xfs_buf_iomove( - xfs_buf_t *bp, /* buffer to process */ - size_t boff, /* starting buffer offset */ - size_t bsize, /* length to copy */ - void *data, /* data address */ - xfs_buf_rw_t mode) /* read/write/zero flag */ -{ - size_t bend, cpoff, csize; - struct page *page; - - bend = boff + bsize; - while (boff < bend) { - page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; - cpoff = xfs_buf_poff(boff + bp->b_offset); - csize = min_t(size_t, - PAGE_SIZE-cpoff, bp->b_count_desired-boff); - - ASSERT(((csize + cpoff) <= PAGE_SIZE)); - - switch (mode) { - case XBRW_ZERO: - memset(page_address(page) + cpoff, 0, csize); - break; - case XBRW_READ: - memcpy(data, page_address(page) + cpoff, csize); - break; - case XBRW_WRITE: - memcpy(page_address(page) + cpoff, data, csize); - } - - boff += csize; - data += csize; - } -} - -/* - * Handling of buffer targets (buftargs). - */ - -/* - * Wait for any bufs with callbacks that have been submitted but have not yet - * returned. These buffers will have an elevated hold count, so wait on those - * while freeing all the buffers only held by the LRU. - */ -void -xfs_wait_buftarg( - struct xfs_buftarg *btp) -{ - struct xfs_buf *bp; - -restart: - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - if (atomic_read(&bp->b_hold) > 1) { - spin_unlock(&btp->bt_lru_lock); - delay(100); - goto restart; - } - /* - * clear the LRU reference count so the bufer doesn't get - * ignored in xfs_buf_rele(). - */ - atomic_set(&bp->b_lru_ref, 0); - spin_unlock(&btp->bt_lru_lock); - xfs_buf_rele(bp); - spin_lock(&btp->bt_lru_lock); - } - spin_unlock(&btp->bt_lru_lock); -} - -int -xfs_buftarg_shrink( - struct shrinker *shrink, - struct shrink_control *sc) -{ - struct xfs_buftarg *btp = container_of(shrink, - struct xfs_buftarg, bt_shrinker); - struct xfs_buf *bp; - int nr_to_scan = sc->nr_to_scan; - LIST_HEAD(dispose); - - if (!nr_to_scan) - return btp->bt_lru_nr; - - spin_lock(&btp->bt_lru_lock); - while (!list_empty(&btp->bt_lru)) { - if (nr_to_scan-- <= 0) - break; - - bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); - - /* - * Decrement the b_lru_ref count unless the value is already - * zero. If the value is already zero, we need to reclaim the - * buffer, otherwise it gets another trip through the LRU. - */ - if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { - list_move_tail(&bp->b_lru, &btp->bt_lru); - continue; - } - - /* - * remove the buffer from the LRU now to avoid needing another - * lock round trip inside xfs_buf_rele(). - */ - list_move(&bp->b_lru, &dispose); - btp->bt_lru_nr--; - } - spin_unlock(&btp->bt_lru_lock); - - while (!list_empty(&dispose)) { - bp = list_first_entry(&dispose, struct xfs_buf, b_lru); - list_del_init(&bp->b_lru); - xfs_buf_rele(bp); - } - - return btp->bt_lru_nr; -} - -void -xfs_free_buftarg( - struct xfs_mount *mp, - struct xfs_buftarg *btp) -{ - unregister_shrinker(&btp->bt_shrinker); - - xfs_flush_buftarg(btp, 1); - if (mp->m_flags & XFS_MOUNT_BARRIER) - xfs_blkdev_issue_flush(btp); - - kthread_stop(btp->bt_task); - kmem_free(btp); -} - -STATIC int -xfs_setsize_buftarg_flags( - xfs_buftarg_t *btp, - unsigned int blocksize, - unsigned int sectorsize, - int verbose) -{ - btp->bt_bsize = blocksize; - btp->bt_sshift = ffs(sectorsize) - 1; - btp->bt_smask = sectorsize - 1; - - if (set_blocksize(btp->bt_bdev, sectorsize)) { - xfs_warn(btp->bt_mount, - "Cannot set_blocksize to %u on device %s\n", - sectorsize, xfs_buf_target_name(btp)); - return EINVAL; - } - - return 0; -} - -/* - * When allocating the initial buffer target we have not yet - * read in the superblock, so don't know what sized sectors - * are being used is at this early stage. Play safe. - */ -STATIC int -xfs_setsize_buftarg_early( - xfs_buftarg_t *btp, - struct block_device *bdev) -{ - return xfs_setsize_buftarg_flags(btp, - PAGE_SIZE, bdev_logical_block_size(bdev), 0); -} - -int -xfs_setsize_buftarg( - xfs_buftarg_t *btp, - unsigned int blocksize, - unsigned int sectorsize) -{ - return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); -} - -STATIC int -xfs_alloc_delwrite_queue( - xfs_buftarg_t *btp, - const char *fsname) -{ - INIT_LIST_HEAD(&btp->bt_delwrite_queue); - spin_lock_init(&btp->bt_delwrite_lock); - btp->bt_flags = 0; - btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); - if (IS_ERR(btp->bt_task)) - return PTR_ERR(btp->bt_task); - return 0; -} - -xfs_buftarg_t * -xfs_alloc_buftarg( - struct xfs_mount *mp, - struct block_device *bdev, - int external, - const char *fsname) -{ - xfs_buftarg_t *btp; - - btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); - - btp->bt_mount = mp; - btp->bt_dev = bdev->bd_dev; - btp->bt_bdev = bdev; - btp->bt_bdi = blk_get_backing_dev_info(bdev); - if (!btp->bt_bdi) - goto error; - - INIT_LIST_HEAD(&btp->bt_lru); - spin_lock_init(&btp->bt_lru_lock); - if (xfs_setsize_buftarg_early(btp, bdev)) - goto error; - if (xfs_alloc_delwrite_queue(btp, fsname)) - goto error; - btp->bt_shrinker.shrink = xfs_buftarg_shrink; - btp->bt_shrinker.seeks = DEFAULT_SEEKS; - register_shrinker(&btp->bt_shrinker); - return btp; - -error: - kmem_free(btp); - return NULL; -} - - -/* - * Delayed write buffer handling - */ -STATIC void -xfs_buf_delwri_queue( - xfs_buf_t *bp, - int unlock) -{ - struct list_head *dwq = &bp->b_target->bt_delwrite_queue; - spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; - - trace_xfs_buf_delwri_queue(bp, _RET_IP_); - - ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); - - spin_lock(dwlk); - /* If already in the queue, dequeue and place at tail */ - if (!list_empty(&bp->b_list)) { - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - if (unlock) - atomic_dec(&bp->b_hold); - list_del(&bp->b_list); - } - - if (list_empty(dwq)) { - /* start xfsbufd as it is about to have something to do */ - wake_up_process(bp->b_target->bt_task); - } - - bp->b_flags |= _XBF_DELWRI_Q; - list_add_tail(&bp->b_list, dwq); - bp->b_queuetime = jiffies; - spin_unlock(dwlk); - - if (unlock) - xfs_buf_unlock(bp); -} - -void -xfs_buf_delwri_dequeue( - xfs_buf_t *bp) -{ - spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; - int dequeued = 0; - - spin_lock(dwlk); - if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - list_del_init(&bp->b_list); - dequeued = 1; - } - bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); - spin_unlock(dwlk); - - if (dequeued) - xfs_buf_rele(bp); - - trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); -} - -/* - * If a delwri buffer needs to be pushed before it has aged out, then promote - * it to the head of the delwri queue so that it will be flushed on the next - * xfsbufd run. We do this by resetting the queuetime of the buffer to be older - * than the age currently needed to flush the buffer. Hence the next time the - * xfsbufd sees it is guaranteed to be considered old enough to flush. - */ -void -xfs_buf_delwri_promote( - struct xfs_buf *bp) -{ - struct xfs_buftarg *btp = bp->b_target; - long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; - - ASSERT(bp->b_flags & XBF_DELWRI); - ASSERT(bp->b_flags & _XBF_DELWRI_Q); - - /* - * Check the buffer age before locking the delayed write queue as we - * don't need to promote buffers that are already past the flush age. - */ - if (bp->b_queuetime < jiffies - age) - return; - bp->b_queuetime = jiffies - age; - spin_lock(&btp->bt_delwrite_lock); - list_move(&bp->b_list, &btp->bt_delwrite_queue); - spin_unlock(&btp->bt_delwrite_lock); -} - -STATIC void -xfs_buf_runall_queues( - struct workqueue_struct *queue) -{ - flush_workqueue(queue); -} - -/* - * Move as many buffers as specified to the supplied list - * idicating if we skipped any buffers to prevent deadlocks. - */ -STATIC int -xfs_buf_delwri_split( - xfs_buftarg_t *target, - struct list_head *list, - unsigned long age) -{ - xfs_buf_t *bp, *n; - struct list_head *dwq = &target->bt_delwrite_queue; - spinlock_t *dwlk = &target->bt_delwrite_lock; - int skipped = 0; - int force; - - force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); - INIT_LIST_HEAD(list); - spin_lock(dwlk); - list_for_each_entry_safe(bp, n, dwq, b_list) { - ASSERT(bp->b_flags & XBF_DELWRI); - - if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { - if (!force && - time_before(jiffies, bp->b_queuetime + age)) { - xfs_buf_unlock(bp); - break; - } - - bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q); - bp->b_flags |= XBF_WRITE; - list_move_tail(&bp->b_list, list); - trace_xfs_buf_delwri_split(bp, _RET_IP_); - } else - skipped++; - } - spin_unlock(dwlk); - - return skipped; - -} - -/* - * Compare function is more complex than it needs to be because - * the return value is only 32 bits and we are doing comparisons - * on 64 bit values - */ -static int -xfs_buf_cmp( - void *priv, - struct list_head *a, - struct list_head *b) -{ - struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); - struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); - xfs_daddr_t diff; - - diff = ap->b_bn - bp->b_bn; - if (diff < 0) - return -1; - if (diff > 0) - return 1; - return 0; -} - -STATIC int -xfsbufd( - void *data) -{ - xfs_buftarg_t *target = (xfs_buftarg_t *)data; - - current->flags |= PF_MEMALLOC; - - set_freezable(); - - do { - long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); - long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); - struct list_head tmp; - struct blk_plug plug; - - if (unlikely(freezing(current))) { - set_bit(XBT_FORCE_SLEEP, &target->bt_flags); - refrigerator(); - } else { - clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); - } - - /* sleep for a long time if there is nothing to do. */ - if (list_empty(&target->bt_delwrite_queue)) - tout = MAX_SCHEDULE_TIMEOUT; - schedule_timeout_interruptible(tout); - - xfs_buf_delwri_split(target, &tmp, age); - list_sort(NULL, &tmp, xfs_buf_cmp); - - blk_start_plug(&plug); - while (!list_empty(&tmp)) { - struct xfs_buf *bp; - bp = list_first_entry(&tmp, struct xfs_buf, b_list); - list_del_init(&bp->b_list); - xfs_bdstrat_cb(bp); - } - blk_finish_plug(&plug); - } while (!kthread_should_stop()); - - return 0; -} - -/* - * Go through all incore buffers, and release buffers if they belong to - * the given device. This is used in filesystem error handling to - * preserve the consistency of its metadata. - */ -int -xfs_flush_buftarg( - xfs_buftarg_t *target, - int wait) -{ - xfs_buf_t *bp; - int pincount = 0; - LIST_HEAD(tmp_list); - LIST_HEAD(wait_list); - struct blk_plug plug; - - xfs_buf_runall_queues(xfsconvertd_workqueue); - xfs_buf_runall_queues(xfsdatad_workqueue); - xfs_buf_runall_queues(xfslogd_workqueue); - - set_bit(XBT_FORCE_FLUSH, &target->bt_flags); - pincount = xfs_buf_delwri_split(target, &tmp_list, 0); - - /* - * Dropped the delayed write list lock, now walk the temporary list. - * All I/O is issued async and then if we need to wait for completion - * we do that after issuing all the IO. - */ - list_sort(NULL, &tmp_list, xfs_buf_cmp); - - blk_start_plug(&plug); - while (!list_empty(&tmp_list)) { - bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); - ASSERT(target == bp->b_target); - list_del_init(&bp->b_list); - if (wait) { - bp->b_flags &= ~XBF_ASYNC; - list_add(&bp->b_list, &wait_list); - } - xfs_bdstrat_cb(bp); - } - blk_finish_plug(&plug); - - if (wait) { - /* Wait for IO to complete. */ - while (!list_empty(&wait_list)) { - bp = list_first_entry(&wait_list, struct xfs_buf, b_list); - - list_del_init(&bp->b_list); - xfs_buf_iowait(bp); - xfs_buf_relse(bp); - } - } - - return pincount; -} - -int __init -xfs_buf_init(void) -{ - xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", - KM_ZONE_HWALIGN, NULL); - if (!xfs_buf_zone) - goto out; - - xfslogd_workqueue = alloc_workqueue("xfslogd", - WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); - if (!xfslogd_workqueue) - goto out_free_buf_zone; - - xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); - if (!xfsdatad_workqueue) - goto out_destroy_xfslogd_workqueue; - - xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", - WQ_MEM_RECLAIM, 1); - if (!xfsconvertd_workqueue) - goto out_destroy_xfsdatad_workqueue; - - return 0; - - out_destroy_xfsdatad_workqueue: - destroy_workqueue(xfsdatad_workqueue); - out_destroy_xfslogd_workqueue: - destroy_workqueue(xfslogd_workqueue); - out_free_buf_zone: - kmem_zone_destroy(xfs_buf_zone); - out: - return -ENOMEM; -} - -void -xfs_buf_terminate(void) -{ - destroy_workqueue(xfsconvertd_workqueue); - destroy_workqueue(xfsdatad_workqueue); - destroy_workqueue(xfslogd_workqueue); - kmem_zone_destroy(xfs_buf_zone); -} - -#ifdef CONFIG_KDB_MODULES -struct list_head * -xfs_get_buftarg_list(void) -{ - return &xfs_buftarg_list; -} -#endif diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h deleted file mode 100644 index 620972b8094d..000000000000 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ /dev/null @@ -1,326 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_BUF_H__ -#define __XFS_BUF_H__ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Base types - */ - -#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) - -#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) -#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) -#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) -#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) - -typedef enum { - XBRW_READ = 1, /* transfer into target memory */ - XBRW_WRITE = 2, /* transfer from target memory */ - XBRW_ZERO = 3, /* Zero target memory */ -} xfs_buf_rw_t; - -#define XBF_READ (1 << 0) /* buffer intended for reading from device */ -#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ -#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ -#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ -#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ -#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ -#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ -#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ - -/* I/O hints for the BIO layer */ -#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ -#define XBF_FUA (1 << 11)/* force cache write through mode */ -#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ - -/* flags used only as arguments to access routines */ -#define XBF_LOCK (1 << 15)/* lock requested */ -#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ -#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */ - -/* flags used only internally */ -#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ -#define _XBF_KMEM (1 << 21)/* backed by heap memory */ -#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */ - -typedef unsigned int xfs_buf_flags_t; - -#define XFS_BUF_FLAGS \ - { XBF_READ, "READ" }, \ - { XBF_WRITE, "WRITE" }, \ - { XBF_READ_AHEAD, "READ_AHEAD" }, \ - { XBF_MAPPED, "MAPPED" }, \ - { XBF_ASYNC, "ASYNC" }, \ - { XBF_DONE, "DONE" }, \ - { XBF_DELWRI, "DELWRI" }, \ - { XBF_STALE, "STALE" }, \ - { XBF_SYNCIO, "SYNCIO" }, \ - { XBF_FUA, "FUA" }, \ - { XBF_FLUSH, "FLUSH" }, \ - { XBF_LOCK, "LOCK" }, /* should never be set */\ - { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ - { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ - { _XBF_PAGES, "PAGES" }, \ - { _XBF_KMEM, "KMEM" }, \ - { _XBF_DELWRI_Q, "DELWRI_Q" } - -typedef enum { - XBT_FORCE_SLEEP = 0, - XBT_FORCE_FLUSH = 1, -} xfs_buftarg_flags_t; - -typedef struct xfs_buftarg { - dev_t bt_dev; - struct block_device *bt_bdev; - struct backing_dev_info *bt_bdi; - struct xfs_mount *bt_mount; - unsigned int bt_bsize; - unsigned int bt_sshift; - size_t bt_smask; - - /* per device delwri queue */ - struct task_struct *bt_task; - struct list_head bt_delwrite_queue; - spinlock_t bt_delwrite_lock; - unsigned long bt_flags; - - /* LRU control structures */ - struct shrinker bt_shrinker; - struct list_head bt_lru; - spinlock_t bt_lru_lock; - unsigned int bt_lru_nr; -} xfs_buftarg_t; - -struct xfs_buf; -typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); - -#define XB_PAGES 2 - -typedef struct xfs_buf { - /* - * first cacheline holds all the fields needed for an uncontended cache - * hit to be fully processed. The semaphore straddles the cacheline - * boundary, but the counter and lock sits on the first cacheline, - * which is the only bit that is touched if we hit the semaphore - * fast-path on locking. - */ - struct rb_node b_rbnode; /* rbtree node */ - xfs_off_t b_file_offset; /* offset in file */ - size_t b_buffer_length;/* size of buffer in bytes */ - atomic_t b_hold; /* reference count */ - atomic_t b_lru_ref; /* lru reclaim ref count */ - xfs_buf_flags_t b_flags; /* status flags */ - struct semaphore b_sema; /* semaphore for lockables */ - - struct list_head b_lru; /* lru list */ - wait_queue_head_t b_waiters; /* unpin waiters */ - struct list_head b_list; - struct xfs_perag *b_pag; /* contains rbtree root */ - xfs_buftarg_t *b_target; /* buffer target (device) */ - xfs_daddr_t b_bn; /* block number for I/O */ - size_t b_count_desired;/* desired transfer size */ - void *b_addr; /* virtual address of buffer */ - struct work_struct b_iodone_work; - xfs_buf_iodone_t b_iodone; /* I/O completion function */ - struct completion b_iowait; /* queue for I/O waiters */ - void *b_fspriv; - struct xfs_trans *b_transp; - struct page **b_pages; /* array of page pointers */ - struct page *b_page_array[XB_PAGES]; /* inline pages */ - unsigned long b_queuetime; /* time buffer was queued */ - atomic_t b_pin_count; /* pin count */ - atomic_t b_io_remaining; /* #outstanding I/O requests */ - unsigned int b_page_count; /* size of page array */ - unsigned int b_offset; /* page offset in first page */ - unsigned short b_error; /* error code on I/O */ -#ifdef XFS_BUF_LOCK_TRACKING - int b_last_holder; -#endif -} xfs_buf_t; - - -/* Finding and Reading Buffers */ -extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, - xfs_buf_flags_t, xfs_buf_t *); -#define xfs_incore(buftarg,blkno,len,lockit) \ - _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) - -extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t, - xfs_buf_flags_t); -extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, - xfs_buf_flags_t); - -extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); -extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); -extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); -extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); -extern void xfs_buf_hold(xfs_buf_t *); -extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); -struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, - struct xfs_buftarg *target, - xfs_daddr_t daddr, size_t length, int flags); - -/* Releasing Buffers */ -extern void xfs_buf_free(xfs_buf_t *); -extern void xfs_buf_rele(xfs_buf_t *); - -/* Locking and Unlocking Buffers */ -extern int xfs_buf_trylock(xfs_buf_t *); -extern void xfs_buf_lock(xfs_buf_t *); -extern void xfs_buf_unlock(xfs_buf_t *); -#define xfs_buf_islocked(bp) \ - ((bp)->b_sema.count <= 0) - -/* Buffer Read and Write Routines */ -extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); -extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); - -extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -extern int xfs_bdstrat_cb(struct xfs_buf *); - -extern void xfs_buf_ioend(xfs_buf_t *, int); -extern void xfs_buf_ioerror(xfs_buf_t *, int); -extern int xfs_buf_iorequest(xfs_buf_t *); -extern int xfs_buf_iowait(xfs_buf_t *); -extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, - xfs_buf_rw_t); -#define xfs_buf_zero(bp, off, len) \ - xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) - -static inline int xfs_buf_geterror(xfs_buf_t *bp) -{ - return bp ? bp->b_error : ENOMEM; -} - -/* Buffer Utility Routines */ -extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); - -/* Delayed Write Buffer Routines */ -extern void xfs_buf_delwri_dequeue(xfs_buf_t *); -extern void xfs_buf_delwri_promote(xfs_buf_t *); - -/* Buffer Daemon Setup Routines */ -extern int xfs_buf_init(void); -extern void xfs_buf_terminate(void); - -static inline const char * -xfs_buf_target_name(struct xfs_buftarg *target) -{ - static char __b[BDEVNAME_SIZE]; - - return bdevname(target->bt_bdev, __b); -} - - -#define XFS_BUF_ZEROFLAGS(bp) \ - ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ - XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) - -void xfs_buf_stale(struct xfs_buf *bp); -#define XFS_BUF_STALE(bp) xfs_buf_stale(bp); -#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) -#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) -#define XFS_BUF_SUPER_STALE(bp) do { \ - XFS_BUF_STALE(bp); \ - xfs_buf_delwri_dequeue(bp); \ - XFS_BUF_DONE(bp); \ - } while (0) - -#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) -#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) -#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) - -#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) -#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) -#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) - -#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) -#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) -#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) - -#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) -#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) -#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) - -#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE) -#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) -#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) - -#define XFS_BUF_ADDR(bp) ((bp)->b_bn) -#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) -#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) -#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off)) -#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) -#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) -#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) -#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) - -static inline void -xfs_buf_set_ref( - struct xfs_buf *bp, - int lru_ref) -{ - atomic_set(&bp->b_lru_ref, lru_ref); -} -#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) -#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) - -static inline int xfs_buf_ispinned(struct xfs_buf *bp) -{ - return atomic_read(&bp->b_pin_count); -} - -#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); - -static inline void xfs_buf_relse(xfs_buf_t *bp) -{ - xfs_buf_unlock(bp); - xfs_buf_rele(bp); -} - -/* - * Handling of buftargs. - */ -extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, - struct block_device *, int, const char *); -extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); -extern void xfs_wait_buftarg(xfs_buftarg_t *); -extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); -extern int xfs_flush_buftarg(xfs_buftarg_t *, int); - -#ifdef CONFIG_KDB_MODULES -extern struct list_head *xfs_get_buftarg_list(void); -#endif - -#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) -#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) - -#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1) -#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) - -#endif /* __XFS_BUF_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c deleted file mode 100644 index 244e797dae32..000000000000 --- a/fs/xfs/linux-2.6/xfs_discard.c +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Copyright (C) 2010 Red Hat, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_sb.h" -#include "xfs_inum.h" -#include "xfs_log.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_quota.h" -#include "xfs_trans.h" -#include "xfs_alloc_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_btree.h" -#include "xfs_inode.h" -#include "xfs_alloc.h" -#include "xfs_error.h" -#include "xfs_discard.h" -#include "xfs_trace.h" - -STATIC int -xfs_trim_extents( - struct xfs_mount *mp, - xfs_agnumber_t agno, - xfs_fsblock_t start, - xfs_fsblock_t len, - xfs_fsblock_t minlen, - __uint64_t *blocks_trimmed) -{ - struct block_device *bdev = mp->m_ddev_targp->bt_bdev; - struct xfs_btree_cur *cur; - struct xfs_buf *agbp; - struct xfs_perag *pag; - int error; - int i; - - pag = xfs_perag_get(mp, agno); - - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); - if (error || !agbp) - goto out_put_perag; - - cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); - - /* - * Force out the log. This means any transactions that might have freed - * space before we took the AGF buffer lock are now on disk, and the - * volatile disk cache is flushed. - */ - xfs_log_force(mp, XFS_LOG_SYNC); - - /* - * Look up the longest btree in the AGF and start with it. - */ - error = xfs_alloc_lookup_le(cur, 0, - XFS_BUF_TO_AGF(agbp)->agf_longest, &i); - if (error) - goto out_del_cursor; - - /* - * Loop until we are done with all extents that are large - * enough to be worth discarding. - */ - while (i) { - xfs_agblock_t fbno; - xfs_extlen_t flen; - - error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); - if (error) - goto out_del_cursor; - XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); - ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); - - /* - * Too small? Give up. - */ - if (flen < minlen) { - trace_xfs_discard_toosmall(mp, agno, fbno, flen); - goto out_del_cursor; - } - - /* - * If the extent is entirely outside of the range we are - * supposed to discard skip it. Do not bother to trim - * down partially overlapping ranges for now. - */ - if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || - XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { - trace_xfs_discard_exclude(mp, agno, fbno, flen); - goto next_extent; - } - - /* - * If any blocks in the range are still busy, skip the - * discard and try again the next time. - */ - if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { - trace_xfs_discard_busy(mp, agno, fbno, flen); - goto next_extent; - } - - trace_xfs_discard_extent(mp, agno, fbno, flen); - error = -blkdev_issue_discard(bdev, - XFS_AGB_TO_DADDR(mp, agno, fbno), - XFS_FSB_TO_BB(mp, flen), - GFP_NOFS, 0); - if (error) - goto out_del_cursor; - *blocks_trimmed += flen; - -next_extent: - error = xfs_btree_decrement(cur, 0, &i); - if (error) - goto out_del_cursor; - } - -out_del_cursor: - xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); - xfs_buf_relse(agbp); -out_put_perag: - xfs_perag_put(pag); - return error; -} - -int -xfs_ioc_trim( - struct xfs_mount *mp, - struct fstrim_range __user *urange) -{ - struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; - unsigned int granularity = q->limits.discard_granularity; - struct fstrim_range range; - xfs_fsblock_t start, len, minlen; - xfs_agnumber_t start_agno, end_agno, agno; - __uint64_t blocks_trimmed = 0; - int error, last_error = 0; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - if (!blk_queue_discard(q)) - return -XFS_ERROR(EOPNOTSUPP); - if (copy_from_user(&range, urange, sizeof(range))) - return -XFS_ERROR(EFAULT); - - /* - * Truncating down the len isn't actually quite correct, but using - * XFS_B_TO_FSB would mean we trivially get overflows for values - * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default - * used by the fstrim application. In the end it really doesn't - * matter as trimming blocks is an advisory interface. - */ - start = XFS_B_TO_FSBT(mp, range.start); - len = XFS_B_TO_FSBT(mp, range.len); - minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); - - start_agno = XFS_FSB_TO_AGNO(mp, start); - if (start_agno >= mp->m_sb.sb_agcount) - return -XFS_ERROR(EINVAL); - - end_agno = XFS_FSB_TO_AGNO(mp, start + len); - if (end_agno >= mp->m_sb.sb_agcount) - end_agno = mp->m_sb.sb_agcount - 1; - - for (agno = start_agno; agno <= end_agno; agno++) { - error = -xfs_trim_extents(mp, agno, start, len, minlen, - &blocks_trimmed); - if (error) - last_error = error; - } - - if (last_error) - return last_error; - - range.len = XFS_FSB_TO_B(mp, blocks_trimmed); - if (copy_to_user(urange, &range, sizeof(range))) - return -XFS_ERROR(EFAULT); - return 0; -} - -int -xfs_discard_extents( - struct xfs_mount *mp, - struct list_head *list) -{ - struct xfs_busy_extent *busyp; - int error = 0; - - list_for_each_entry(busyp, list, list) { - trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, - busyp->length); - - error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, - XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), - XFS_FSB_TO_BB(mp, busyp->length), - GFP_NOFS, 0); - if (error && error != EOPNOTSUPP) { - xfs_info(mp, - "discard failed for extent [0x%llu,%u], error %d", - (unsigned long long)busyp->bno, - busyp->length, - error); - return error; - } - } - - return 0; -} diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h deleted file mode 100644 index 344879aea646..000000000000 --- a/fs/xfs/linux-2.6/xfs_discard.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef XFS_DISCARD_H -#define XFS_DISCARD_H 1 - -struct fstrim_range; -struct list_head; - -extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); -extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); - -#endif /* XFS_DISCARD_H */ diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c deleted file mode 100644 index 75e5d322e48f..000000000000 --- a/fs/xfs/linux-2.6/xfs_export.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (c) 2004-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_types.h" -#include "xfs_inum.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_mount.h" -#include "xfs_export.h" -#include "xfs_vnodeops.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_trace.h" - -/* - * Note that we only accept fileids which are long enough rather than allow - * the parent generation number to default to zero. XFS considers zero a - * valid generation number not an invalid/wildcard value. - */ -static int xfs_fileid_length(int fileid_type) -{ - switch (fileid_type) { - case FILEID_INO32_GEN: - return 2; - case FILEID_INO32_GEN_PARENT: - return 4; - case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: - return 3; - case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - return 6; - } - return 255; /* invalid */ -} - -STATIC int -xfs_fs_encode_fh( - struct dentry *dentry, - __u32 *fh, - int *max_len, - int connectable) -{ - struct fid *fid = (struct fid *)fh; - struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; - struct inode *inode = dentry->d_inode; - int fileid_type; - int len; - - /* Directories don't need their parent encoded, they have ".." */ - if (S_ISDIR(inode->i_mode) || !connectable) - fileid_type = FILEID_INO32_GEN; - else - fileid_type = FILEID_INO32_GEN_PARENT; - - /* - * If the the filesystem may contain 64bit inode numbers, we need - * to use larger file handles that can represent them. - * - * While we only allocate inodes that do not fit into 32 bits any - * large enough filesystem may contain them, thus the slightly - * confusing looking conditional below. - */ - if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || - (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) - fileid_type |= XFS_FILEID_TYPE_64FLAG; - - /* - * Only encode if there is enough space given. In practice - * this means we can't export a filesystem with 64bit inodes - * over NFSv2 with the subtree_check export option; the other - * seven combinations work. The real answer is "don't use v2". - */ - len = xfs_fileid_length(fileid_type); - if (*max_len < len) { - *max_len = len; - return 255; - } - *max_len = len; - - switch (fileid_type) { - case FILEID_INO32_GEN_PARENT: - spin_lock(&dentry->d_lock); - fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino; - fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; - spin_unlock(&dentry->d_lock); - /*FALLTHRU*/ - case FILEID_INO32_GEN: - fid->i32.ino = inode->i_ino; - fid->i32.gen = inode->i_generation; - break; - case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - spin_lock(&dentry->d_lock); - fid64->parent_ino = dentry->d_parent->d_inode->i_ino; - fid64->parent_gen = dentry->d_parent->d_inode->i_generation; - spin_unlock(&dentry->d_lock); - /*FALLTHRU*/ - case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: - fid64->ino = inode->i_ino; - fid64->gen = inode->i_generation; - break; - } - - return fileid_type; -} - -STATIC struct inode * -xfs_nfs_get_inode( - struct super_block *sb, - u64 ino, - u32 generation) - { - xfs_mount_t *mp = XFS_M(sb); - xfs_inode_t *ip; - int error; - - /* - * NFS can sometimes send requests for ino 0. Fail them gracefully. - */ - if (ino == 0) - return ERR_PTR(-ESTALE); - - /* - * The XFS_IGET_UNTRUSTED means that an invalid inode number is just - * fine and not an indication of a corrupted filesystem as clients can - * send invalid file handles and we have to handle it gracefully.. - */ - error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip); - if (error) { - /* - * EINVAL means the inode cluster doesn't exist anymore. - * This implies the filehandle is stale, so we should - * translate it here. - * We don't use ESTALE directly down the chain to not - * confuse applications using bulkstat that expect EINVAL. - */ - if (error == EINVAL || error == ENOENT) - error = ESTALE; - return ERR_PTR(-error); - } - - if (ip->i_d.di_gen != generation) { - IRELE(ip); - return ERR_PTR(-ESTALE); - } - - return VFS_I(ip); -} - -STATIC struct dentry * -xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, - int fh_len, int fileid_type) -{ - struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; - struct inode *inode = NULL; - - if (fh_len < xfs_fileid_length(fileid_type)) - return NULL; - - switch (fileid_type) { - case FILEID_INO32_GEN_PARENT: - case FILEID_INO32_GEN: - inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen); - break; - case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: - inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen); - break; - } - - return d_obtain_alias(inode); -} - -STATIC struct dentry * -xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, - int fh_len, int fileid_type) -{ - struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; - struct inode *inode = NULL; - - switch (fileid_type) { - case FILEID_INO32_GEN_PARENT: - inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, - fid->i32.parent_gen); - break; - case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - inode = xfs_nfs_get_inode(sb, fid64->parent_ino, - fid64->parent_gen); - break; - } - - return d_obtain_alias(inode); -} - -STATIC struct dentry * -xfs_fs_get_parent( - struct dentry *child) -{ - int error; - struct xfs_inode *cip; - - error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); - if (unlikely(error)) - return ERR_PTR(-error); - - return d_obtain_alias(VFS_I(cip)); -} - -STATIC int -xfs_fs_nfs_commit_metadata( - struct inode *inode) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - int error = 0; - - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (xfs_ipincount(ip)) { - error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn, - XFS_LOG_SYNC, NULL); - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - return error; -} - -const struct export_operations xfs_export_operations = { - .encode_fh = xfs_fs_encode_fh, - .fh_to_dentry = xfs_fs_fh_to_dentry, - .fh_to_parent = xfs_fs_fh_to_parent, - .get_parent = xfs_fs_get_parent, - .commit_metadata = xfs_fs_nfs_commit_metadata, -}; diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h deleted file mode 100644 index 3272b6ae7a35..000000000000 --- a/fs/xfs/linux-2.6/xfs_export.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_EXPORT_H__ -#define __XFS_EXPORT_H__ - -/* - * Common defines for code related to exporting XFS filesystems over NFS. - * - * The NFS fileid goes out on the wire as an array of - * 32bit unsigned ints in host order. There are 5 possible - * formats. - * - * (1) fileid_type=0x00 - * (no fileid data; handled by the generic code) - * - * (2) fileid_type=0x01 - * inode-num - * generation - * - * (3) fileid_type=0x02 - * inode-num - * generation - * parent-inode-num - * parent-generation - * - * (4) fileid_type=0x81 - * inode-num-lo32 - * inode-num-hi32 - * generation - * - * (5) fileid_type=0x82 - * inode-num-lo32 - * inode-num-hi32 - * generation - * parent-inode-num-lo32 - * parent-inode-num-hi32 - * parent-generation - * - * Note, the NFS filehandle also includes an fsid portion which - * may have an inode number in it. That number is hardcoded to - * 32bits and there is no way for XFS to intercept it. In - * practice this means when exporting an XFS filesystem with 64bit - * inodes you should either export the mountpoint (rather than - * a subdirectory) or use the "fsid" export option. - */ - -struct xfs_fid64 { - u64 ino; - u32 gen; - u64 parent_ino; - u32 parent_gen; -} __attribute__((packed)); - -/* This flag goes on the wire. Don't play with it. */ -#define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ - -#endif /* __XFS_EXPORT_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c deleted file mode 100644 index 7f7b42469ea7..000000000000 --- a/fs/xfs/linux-2.6/xfs_file.c +++ /dev/null @@ -1,1096 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_trans.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_bmap.h" -#include "xfs_error.h" -#include "xfs_vnodeops.h" -#include "xfs_da_btree.h" -#include "xfs_ioctl.h" -#include "xfs_trace.h" - -#include -#include - -static const struct vm_operations_struct xfs_file_vm_ops; - -/* - * Locking primitives for read and write IO paths to ensure we consistently use - * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. - */ -static inline void -xfs_rw_ilock( - struct xfs_inode *ip, - int type) -{ - if (type & XFS_IOLOCK_EXCL) - mutex_lock(&VFS_I(ip)->i_mutex); - xfs_ilock(ip, type); -} - -static inline void -xfs_rw_iunlock( - struct xfs_inode *ip, - int type) -{ - xfs_iunlock(ip, type); - if (type & XFS_IOLOCK_EXCL) - mutex_unlock(&VFS_I(ip)->i_mutex); -} - -static inline void -xfs_rw_ilock_demote( - struct xfs_inode *ip, - int type) -{ - xfs_ilock_demote(ip, type); - if (type & XFS_IOLOCK_EXCL) - mutex_unlock(&VFS_I(ip)->i_mutex); -} - -/* - * xfs_iozero - * - * xfs_iozero clears the specified range of buffer supplied, - * and marks all the affected blocks as valid and modified. If - * an affected block is not allocated, it will be allocated. If - * an affected block is not completely overwritten, and is not - * valid before the operation, it will be read from disk before - * being partially zeroed. - */ -STATIC int -xfs_iozero( - struct xfs_inode *ip, /* inode */ - loff_t pos, /* offset in file */ - size_t count) /* size of data to zero */ -{ - struct page *page; - struct address_space *mapping; - int status; - - mapping = VFS_I(ip)->i_mapping; - do { - unsigned offset, bytes; - void *fsdata; - - offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ - bytes = PAGE_CACHE_SIZE - offset; - if (bytes > count) - bytes = count; - - status = pagecache_write_begin(NULL, mapping, pos, bytes, - AOP_FLAG_UNINTERRUPTIBLE, - &page, &fsdata); - if (status) - break; - - zero_user(page, offset, bytes); - - status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, - page, fsdata); - WARN_ON(status <= 0); /* can't return less than zero! */ - pos += bytes; - count -= bytes; - status = 0; - } while (count); - - return (-status); -} - -STATIC int -xfs_file_fsync( - struct file *file, - loff_t start, - loff_t end, - int datasync) -{ - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error = 0; - int log_flushed = 0; - - trace_xfs_file_fsync(ip); - - error = filemap_write_and_wait_range(inode->i_mapping, start, end); - if (error) - return error; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - xfs_iflags_clear(ip, XFS_ITRUNCATED); - - xfs_ilock(ip, XFS_IOLOCK_SHARED); - xfs_ioend_wait(ip); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - - if (mp->m_flags & XFS_MOUNT_BARRIER) { - /* - * If we have an RT and/or log subvolume we need to make sure - * to flush the write cache the device used for file data - * first. This is to ensure newly written file data make - * it to disk before logging the new inode size in case of - * an extending write. - */ - if (XFS_IS_REALTIME_INODE(ip)) - xfs_blkdev_issue_flush(mp->m_rtdev_targp); - else if (mp->m_logdev_targp != mp->m_ddev_targp) - xfs_blkdev_issue_flush(mp->m_ddev_targp); - } - - /* - * We always need to make sure that the required inode state is safe on - * disk. The inode might be clean but we still might need to force the - * log because of committed transactions that haven't hit the disk yet. - * Likewise, there could be unflushed non-transactional changes to the - * inode core that have to go to disk and this requires us to issue - * a synchronous transaction to capture these changes correctly. - * - * This code relies on the assumption that if the i_update_core field - * of the inode is clear and the inode is unpinned then it is clean - * and no action is required. - */ - xfs_ilock(ip, XFS_ILOCK_SHARED); - - /* - * First check if the VFS inode is marked dirty. All the dirtying - * of non-transactional updates no goes through mark_inode_dirty*, - * which allows us to distinguish beteeen pure timestamp updates - * and i_size updates which need to be caught for fdatasync. - * After that also theck for the dirty state in the XFS inode, which - * might gets cleared when the inode gets written out via the AIL - * or xfs_iflush_cluster. - */ - if (((inode->i_state & I_DIRTY_DATASYNC) || - ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && - ip->i_update_core) { - /* - * Kick off a transaction to log the inode core to get the - * updates. The sync transaction will also force the log. - */ - xfs_iunlock(ip, XFS_ILOCK_SHARED); - tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); - error = xfs_trans_reserve(tp, 0, - XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); - if (error) { - xfs_trans_cancel(tp, 0); - return -error; - } - xfs_ilock(ip, XFS_ILOCK_EXCL); - - /* - * Note - it's possible that we might have pushed ourselves out - * of the way during trans_reserve which would flush the inode. - * But there's no guarantee that the inode buffer has actually - * gone out yet (it's delwri). Plus the buffer could be pinned - * anyway if it's part of an inode in another recent - * transaction. So we play it safe and fire off the - * transaction anyway. - */ - xfs_trans_ijoin(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - xfs_trans_set_sync(tp); - error = _xfs_trans_commit(tp, 0, &log_flushed); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - } else { - /* - * Timestamps/size haven't changed since last inode flush or - * inode transaction commit. That means either nothing got - * written or a transaction committed which caught the updates. - * If the latter happened and the transaction hasn't hit the - * disk yet, the inode will be still be pinned. If it is, - * force the log. - */ - if (xfs_ipincount(ip)) { - error = _xfs_log_force_lsn(mp, - ip->i_itemp->ili_last_lsn, - XFS_LOG_SYNC, &log_flushed); - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - } - - /* - * If we only have a single device, and the log force about was - * a no-op we might have to flush the data device cache here. - * This can only happen for fdatasync/O_DSYNC if we were overwriting - * an already allocated file and thus do not have any metadata to - * commit. - */ - if ((mp->m_flags & XFS_MOUNT_BARRIER) && - mp->m_logdev_targp == mp->m_ddev_targp && - !XFS_IS_REALTIME_INODE(ip) && - !log_flushed) - xfs_blkdev_issue_flush(mp->m_ddev_targp); - - return -error; -} - -STATIC ssize_t -xfs_file_aio_read( - struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) -{ - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - size_t size = 0; - ssize_t ret = 0; - int ioflags = 0; - xfs_fsize_t n; - unsigned long seg; - - XFS_STATS_INC(xs_read_calls); - - BUG_ON(iocb->ki_pos != pos); - - if (unlikely(file->f_flags & O_DIRECT)) - ioflags |= IO_ISDIRECT; - if (file->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - /* START copy & waste from filemap.c */ - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *iv = &iovp[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - size += iv->iov_len; - if (unlikely((ssize_t)(size|iv->iov_len) < 0)) - return XFS_ERROR(-EINVAL); - } - /* END copy & waste from filemap.c */ - - if (unlikely(ioflags & IO_ISDIRECT)) { - xfs_buftarg_t *target = - XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; - if ((iocb->ki_pos & target->bt_smask) || - (size & target->bt_smask)) { - if (iocb->ki_pos == ip->i_size) - return 0; - return -XFS_ERROR(EINVAL); - } - } - - n = XFS_MAXIOFFSET(mp) - iocb->ki_pos; - if (n <= 0 || size == 0) - return 0; - - if (n < size) - size = n; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - if (unlikely(ioflags & IO_ISDIRECT)) { - xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); - - if (inode->i_mapping->nrpages) { - ret = -xfs_flushinval_pages(ip, - (iocb->ki_pos & PAGE_CACHE_MASK), - -1, FI_REMAPF_LOCKED); - if (ret) { - xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; - } - } - xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); - } else - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - - trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); - - ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos); - if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); - - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); - return ret; -} - -STATIC ssize_t -xfs_file_splice_read( - struct file *infilp, - loff_t *ppos, - struct pipe_inode_info *pipe, - size_t count, - unsigned int flags) -{ - struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); - int ioflags = 0; - ssize_t ret; - - XFS_STATS_INC(xs_read_calls); - - if (infilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); - - trace_xfs_file_splice_read(ip, count, *ppos, ioflags); - - ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); - if (ret > 0) - XFS_STATS_ADD(xs_read_bytes, ret); - - xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); - return ret; -} - -STATIC void -xfs_aio_write_isize_update( - struct inode *inode, - loff_t *ppos, - ssize_t bytes_written) -{ - struct xfs_inode *ip = XFS_I(inode); - xfs_fsize_t isize = i_size_read(inode); - - if (bytes_written > 0) - XFS_STATS_ADD(xs_write_bytes, bytes_written); - - if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && - *ppos > isize)) - *ppos = isize; - - if (*ppos > ip->i_size) { - xfs_rw_ilock(ip, XFS_ILOCK_EXCL); - if (*ppos > ip->i_size) - ip->i_size = *ppos; - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - } -} - -/* - * If this was a direct or synchronous I/O that failed (such as ENOSPC) then - * part of the I/O may have been written to disk before the error occurred. In - * this case the on-disk file size may have been adjusted beyond the in-memory - * file size and now needs to be truncated back. - */ -STATIC void -xfs_aio_write_newsize_update( - struct xfs_inode *ip) -{ - if (ip->i_new_size) { - xfs_rw_ilock(ip, XFS_ILOCK_EXCL); - ip->i_new_size = 0; - if (ip->i_d.di_size > ip->i_size) - ip->i_d.di_size = ip->i_size; - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - } -} - -/* - * xfs_file_splice_write() does not use xfs_rw_ilock() because - * generic_file_splice_write() takes the i_mutex itself. This, in theory, - * couuld cause lock inversions between the aio_write path and the splice path - * if someone is doing concurrent splice(2) based writes and write(2) based - * writes to the same inode. The only real way to fix this is to re-implement - * the generic code here with correct locking orders. - */ -STATIC ssize_t -xfs_file_splice_write( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t count, - unsigned int flags) -{ - struct inode *inode = outfilp->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - xfs_fsize_t new_size; - int ioflags = 0; - ssize_t ret; - - XFS_STATS_INC(xs_write_calls); - - if (outfilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - new_size = *ppos + count; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (new_size > ip->i_size) - ip->i_new_size = new_size; - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - trace_xfs_file_splice_write(ip, count, *ppos, ioflags); - - ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); - - xfs_aio_write_isize_update(inode, ppos, ret); - xfs_aio_write_newsize_update(ip); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; -} - -/* - * This routine is called to handle zeroing any space in the last - * block of the file that is beyond the EOF. We do this since the - * size is being increased without writing anything to that block - * and we don't want anyone to read the garbage on the disk. - */ -STATIC int /* error (positive) */ -xfs_zero_last_block( - xfs_inode_t *ip, - xfs_fsize_t offset, - xfs_fsize_t isize) -{ - xfs_fileoff_t last_fsb; - xfs_mount_t *mp = ip->i_mount; - int nimaps; - int zero_offset; - int zero_len; - int error = 0; - xfs_bmbt_irec_t imap; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - zero_offset = XFS_B_FSB_OFFSET(mp, isize); - if (zero_offset == 0) { - /* - * There are no extra bytes in the last block on disk to - * zero, so return. - */ - return 0; - } - - last_fsb = XFS_B_TO_FSBT(mp, isize); - nimaps = 1; - error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, - &nimaps, NULL); - if (error) { - return error; - } - ASSERT(nimaps > 0); - /* - * If the block underlying isize is just a hole, then there - * is nothing to zero. - */ - if (imap.br_startblock == HOLESTARTBLOCK) { - return 0; - } - /* - * Zero the part of the last block beyond the EOF, and write it - * out sync. We need to drop the ilock while we do this so we - * don't deadlock when the buffer cache calls back to us. - */ - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - zero_len = mp->m_sb.sb_blocksize - zero_offset; - if (isize + zero_len > offset) - zero_len = offset - isize; - error = xfs_iozero(ip, isize, zero_len); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - ASSERT(error >= 0); - return error; -} - -/* - * Zero any on disk space between the current EOF and the new, - * larger EOF. This handles the normal case of zeroing the remainder - * of the last block in the file and the unusual case of zeroing blocks - * out beyond the size of the file. This second case only happens - * with fixed size extents and when the system crashes before the inode - * size was updated but after blocks were allocated. If fill is set, - * then any holes in the range are filled and zeroed. If not, the holes - * are left alone as holes. - */ - -int /* error (positive) */ -xfs_zero_eof( - xfs_inode_t *ip, - xfs_off_t offset, /* starting I/O offset */ - xfs_fsize_t isize) /* current inode size */ -{ - xfs_mount_t *mp = ip->i_mount; - xfs_fileoff_t start_zero_fsb; - xfs_fileoff_t end_zero_fsb; - xfs_fileoff_t zero_count_fsb; - xfs_fileoff_t last_fsb; - xfs_fileoff_t zero_off; - xfs_fsize_t zero_len; - int nimaps; - int error = 0; - xfs_bmbt_irec_t imap; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - ASSERT(offset > isize); - - /* - * First handle zeroing the block on which isize resides. - * We only zero a part of that block so it is handled specially. - */ - error = xfs_zero_last_block(ip, offset, isize); - if (error) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - return error; - } - - /* - * Calculate the range between the new size and the old - * where blocks needing to be zeroed may exist. To get the - * block where the last byte in the file currently resides, - * we need to subtract one from the size and truncate back - * to a block boundary. We subtract 1 in case the size is - * exactly on a block boundary. - */ - last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; - start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); - end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); - ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); - if (last_fsb == end_zero_fsb) { - /* - * The size was only incremented on its last block. - * We took care of that above, so just return. - */ - return 0; - } - - ASSERT(start_zero_fsb <= end_zero_fsb); - while (start_zero_fsb <= end_zero_fsb) { - nimaps = 1; - zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; - error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, - 0, NULL, 0, &imap, &nimaps, NULL); - if (error) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - return error; - } - ASSERT(nimaps > 0); - - if (imap.br_state == XFS_EXT_UNWRITTEN || - imap.br_startblock == HOLESTARTBLOCK) { - /* - * This loop handles initializing pages that were - * partially initialized by the code below this - * loop. It basically zeroes the part of the page - * that sits on a hole and sets the page as P_HOLE - * and calls remapf if it is a mapped file. - */ - start_zero_fsb = imap.br_startoff + imap.br_blockcount; - ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); - continue; - } - - /* - * There are blocks we need to zero. - * Drop the inode lock while we're doing the I/O. - * We'll still have the iolock to protect us. - */ - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); - zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); - - if ((zero_off + zero_len) > offset) - zero_len = offset - zero_off; - - error = xfs_iozero(ip, zero_off, zero_len); - if (error) { - goto out_lock; - } - - start_zero_fsb = imap.br_startoff + imap.br_blockcount; - ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); - - xfs_ilock(ip, XFS_ILOCK_EXCL); - } - - return 0; - -out_lock: - xfs_ilock(ip, XFS_ILOCK_EXCL); - ASSERT(error >= 0); - return error; -} - -/* - * Common pre-write limit and setup checks. - * - * Returns with iolock held according to @iolock. - */ -STATIC ssize_t -xfs_file_aio_write_checks( - struct file *file, - loff_t *pos, - size_t *count, - int *iolock) -{ - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - xfs_fsize_t new_size; - int error = 0; - - error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); - if (error) { - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); - *iolock = 0; - return error; - } - - new_size = *pos + *count; - if (new_size > ip->i_size) - ip->i_new_size = new_size; - - if (likely(!(file->f_mode & FMODE_NOCMTIME))) - file_update_time(file); - - /* - * If the offset is beyond the size of the file, we need to zero any - * blocks that fall between the existing EOF and the start of this - * write. - */ - if (*pos > ip->i_size) - error = -xfs_zero_eof(ip, *pos, ip->i_size); - - xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); - if (error) - return error; - - /* - * If we're writing the file then make sure to clear the setuid and - * setgid bits if the process is not being run by root. This keeps - * people from modifying setuid and setgid binaries. - */ - return file_remove_suid(file); - -} - -/* - * xfs_file_dio_aio_write - handle direct IO writes - * - * Lock the inode appropriately to prepare for and issue a direct IO write. - * By separating it from the buffered write path we remove all the tricky to - * follow locking changes and looping. - * - * If there are cached pages or we're extending the file, we need IOLOCK_EXCL - * until we're sure the bytes at the new EOF have been zeroed and/or the cached - * pages are flushed out. - * - * In most cases the direct IO writes will be done holding IOLOCK_SHARED - * allowing them to be done in parallel with reads and other direct IO writes. - * However, if the IO is not aligned to filesystem blocks, the direct IO layer - * needs to do sub-block zeroing and that requires serialisation against other - * direct IOs to the same block. In this case we need to serialise the - * submission of the unaligned IOs so that we don't get racing block zeroing in - * the dio layer. To avoid the problem with aio, we also need to wait for - * outstanding IOs to complete so that unwritten extent conversion is completed - * before we try to map the overlapping block. This is currently implemented by - * hitting it with a big hammer (i.e. xfs_ioend_wait()). - * - * Returns with locks held indicated by @iolock and errors indicated by - * negative return values. - */ -STATIC ssize_t -xfs_file_dio_aio_write( - struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount, - int *iolock) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - ssize_t ret = 0; - size_t count = ocount; - int unaligned_io = 0; - struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; - - *iolock = 0; - if ((pos & target->bt_smask) || (count & target->bt_smask)) - return -XFS_ERROR(EINVAL); - - if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) - unaligned_io = 1; - - if (unaligned_io || mapping->nrpages || pos > ip->i_size) - *iolock = XFS_IOLOCK_EXCL; - else - *iolock = XFS_IOLOCK_SHARED; - xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); - - ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); - if (ret) - return ret; - - if (mapping->nrpages) { - WARN_ON(*iolock != XFS_IOLOCK_EXCL); - ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, - FI_REMAPF_LOCKED); - if (ret) - return ret; - } - - /* - * If we are doing unaligned IO, wait for all other IO to drain, - * otherwise demote the lock if we had to flush cached pages - */ - if (unaligned_io) - xfs_ioend_wait(ip); - else if (*iolock == XFS_IOLOCK_EXCL) { - xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); - *iolock = XFS_IOLOCK_SHARED; - } - - trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_direct_write(iocb, iovp, - &nr_segs, pos, &iocb->ki_pos, count, ocount); - - /* No fallback to buffered IO on errors for XFS. */ - ASSERT(ret < 0 || ret == count); - return ret; -} - -STATIC ssize_t -xfs_file_buffered_aio_write( - struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount, - int *iolock) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct xfs_inode *ip = XFS_I(inode); - ssize_t ret; - int enospc = 0; - size_t count = ocount; - - *iolock = XFS_IOLOCK_EXCL; - xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); - - ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); - if (ret) - return ret; - - /* We can write back this queue in page reclaim */ - current->backing_dev_info = mapping->backing_dev_info; - -write_retry: - trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_buffered_write(iocb, iovp, nr_segs, - pos, &iocb->ki_pos, count, ret); - /* - * if we just got an ENOSPC, flush the inode now we aren't holding any - * page locks and retry *once* - */ - if (ret == -ENOSPC && !enospc) { - ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); - if (ret) - return ret; - enospc = 1; - goto write_retry; - } - current->backing_dev_info = NULL; - return ret; -} - -STATIC ssize_t -xfs_file_aio_write( - struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) -{ - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; - struct xfs_inode *ip = XFS_I(inode); - ssize_t ret; - int iolock; - size_t ocount = 0; - - XFS_STATS_INC(xs_write_calls); - - BUG_ON(iocb->ki_pos != pos); - - ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); - if (ret) - return ret; - - if (ocount == 0) - return 0; - - xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - if (unlikely(file->f_flags & O_DIRECT)) - ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, - ocount, &iolock); - else - ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, - ocount, &iolock); - - xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); - - if (ret <= 0) - goto out_unlock; - - /* Handle various SYNC-type writes */ - if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { - loff_t end = pos + ret - 1; - int error; - - xfs_rw_iunlock(ip, iolock); - error = xfs_file_fsync(file, pos, end, - (file->f_flags & __O_SYNC) ? 0 : 1); - xfs_rw_ilock(ip, iolock); - if (error) - ret = error; - } - -out_unlock: - xfs_aio_write_newsize_update(ip); - xfs_rw_iunlock(ip, iolock); - return ret; -} - -STATIC long -xfs_file_fallocate( - struct file *file, - int mode, - loff_t offset, - loff_t len) -{ - struct inode *inode = file->f_path.dentry->d_inode; - long error; - loff_t new_size = 0; - xfs_flock64_t bf; - xfs_inode_t *ip = XFS_I(inode); - int cmd = XFS_IOC_RESVSP; - int attr_flags = XFS_ATTR_NOLOCK; - - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) - return -EOPNOTSUPP; - - bf.l_whence = 0; - bf.l_start = offset; - bf.l_len = len; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - if (mode & FALLOC_FL_PUNCH_HOLE) - cmd = XFS_IOC_UNRESVSP; - - /* check the new inode size is valid before allocating */ - if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { - new_size = offset + len; - error = inode_newsize_ok(inode, new_size); - if (error) - goto out_unlock; - } - - if (file->f_flags & O_DSYNC) - attr_flags |= XFS_ATTR_SYNC; - - error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); - if (error) - goto out_unlock; - - /* Change file size if needed */ - if (new_size) { - struct iattr iattr; - - iattr.ia_valid = ATTR_SIZE; - iattr.ia_size = new_size; - error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); - } - -out_unlock: - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return error; -} - - -STATIC int -xfs_file_open( - struct inode *inode, - struct file *file) -{ - if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) - return -EFBIG; - if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) - return -EIO; - return 0; -} - -STATIC int -xfs_dir_open( - struct inode *inode, - struct file *file) -{ - struct xfs_inode *ip = XFS_I(inode); - int mode; - int error; - - error = xfs_file_open(inode, file); - if (error) - return error; - - /* - * If there are any blocks, read-ahead block 0 as we're almost - * certain to have the next operation be a read there. - */ - mode = xfs_ilock_map_shared(ip); - if (ip->i_d.di_nextents > 0) - xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); - xfs_iunlock(ip, mode); - return 0; -} - -STATIC int -xfs_file_release( - struct inode *inode, - struct file *filp) -{ - return -xfs_release(XFS_I(inode)); -} - -STATIC int -xfs_file_readdir( - struct file *filp, - void *dirent, - filldir_t filldir) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - xfs_inode_t *ip = XFS_I(inode); - int error; - size_t bufsize; - - /* - * The Linux API doesn't pass down the total size of the buffer - * we read into down to the filesystem. With the filldir concept - * it's not needed for correct information, but the XFS dir2 leaf - * code wants an estimate of the buffer size to calculate it's - * readahead window and size the buffers used for mapping to - * physical blocks. - * - * Try to give it an estimate that's good enough, maybe at some - * point we can change the ->readdir prototype to include the - * buffer size. For now we use the current glibc buffer size. - */ - bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); - - error = xfs_readdir(ip, dirent, bufsize, - (xfs_off_t *)&filp->f_pos, filldir); - if (error) - return -error; - return 0; -} - -STATIC int -xfs_file_mmap( - struct file *filp, - struct vm_area_struct *vma) -{ - vma->vm_ops = &xfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; - - file_accessed(filp); - return 0; -} - -/* - * mmap()d file has taken write protection fault and is being made - * writable. We can set the page state up correctly for a writable - * page, which means we can do correct delalloc accounting (ENOSPC - * checking!) and unwritten extent mapping. - */ -STATIC int -xfs_vm_page_mkwrite( - struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - return block_page_mkwrite(vma, vmf, xfs_get_blocks); -} - -const struct file_operations xfs_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = xfs_file_aio_read, - .aio_write = xfs_file_aio_write, - .splice_read = xfs_file_splice_read, - .splice_write = xfs_file_splice_write, - .unlocked_ioctl = xfs_file_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = xfs_file_compat_ioctl, -#endif - .mmap = xfs_file_mmap, - .open = xfs_file_open, - .release = xfs_file_release, - .fsync = xfs_file_fsync, - .fallocate = xfs_file_fallocate, -}; - -const struct file_operations xfs_dir_file_operations = { - .open = xfs_dir_open, - .read = generic_read_dir, - .readdir = xfs_file_readdir, - .llseek = generic_file_llseek, - .unlocked_ioctl = xfs_file_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = xfs_file_compat_ioctl, -#endif - .fsync = xfs_file_fsync, -}; - -static const struct vm_operations_struct xfs_file_vm_ops = { - .fault = filemap_fault, - .page_mkwrite = xfs_vm_page_mkwrite, -}; diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c deleted file mode 100644 index ed88ed16811c..000000000000 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_vnodeops.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_trace.h" - -/* - * note: all filemap functions return negative error codes. These - * need to be inverted before returning to the xfs core functions. - */ -void -xfs_tosspages( - xfs_inode_t *ip, - xfs_off_t first, - xfs_off_t last, - int fiopt) -{ - /* can't toss partial tail pages, so mask them out */ - last &= ~(PAGE_SIZE - 1); - truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1); -} - -int -xfs_flushinval_pages( - xfs_inode_t *ip, - xfs_off_t first, - xfs_off_t last, - int fiopt) -{ - struct address_space *mapping = VFS_I(ip)->i_mapping; - int ret = 0; - - trace_xfs_pagecache_inval(ip, first, last); - - xfs_iflags_clear(ip, XFS_ITRUNCATED); - ret = filemap_write_and_wait_range(mapping, first, - last == -1 ? LLONG_MAX : last); - if (!ret) - truncate_inode_pages_range(mapping, first, last); - return -ret; -} - -int -xfs_flush_pages( - xfs_inode_t *ip, - xfs_off_t first, - xfs_off_t last, - uint64_t flags, - int fiopt) -{ - struct address_space *mapping = VFS_I(ip)->i_mapping; - int ret = 0; - int ret2; - - xfs_iflags_clear(ip, XFS_ITRUNCATED); - ret = -filemap_fdatawrite_range(mapping, first, - last == -1 ? LLONG_MAX : last); - if (flags & XBF_ASYNC) - return ret; - ret2 = xfs_wait_on_pages(ip, first, last); - if (!ret) - ret = ret2; - return ret; -} - -int -xfs_wait_on_pages( - xfs_inode_t *ip, - xfs_off_t first, - xfs_off_t last) -{ - struct address_space *mapping = VFS_I(ip)->i_mapping; - - if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { - return -filemap_fdatawait_range(mapping, first, - last == -1 ? ip->i_size - 1 : last); - } - return 0; -} diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c deleted file mode 100644 index 76e81cff70b9..000000000000 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_sysctl.h" - -/* - * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, - * other XFS code uses these values. Times are measured in centisecs (i.e. - * 100ths of a second). - */ -xfs_param_t xfs_params = { - /* MIN DFLT MAX */ - .sgid_inherit = { 0, 0, 1 }, - .symlink_mode = { 0, 0, 1 }, - .panic_mask = { 0, 0, 255 }, - .error_level = { 0, 3, 11 }, - .syncd_timer = { 1*100, 30*100, 7200*100}, - .stats_clear = { 0, 0, 1 }, - .inherit_sync = { 0, 1, 1 }, - .inherit_nodump = { 0, 1, 1 }, - .inherit_noatim = { 0, 1, 1 }, - .xfs_buf_timer = { 100/2, 1*100, 30*100 }, - .xfs_buf_age = { 1*100, 15*100, 7200*100}, - .inherit_nosym = { 0, 0, 1 }, - .rotorstep = { 1, 1, 255 }, - .inherit_nodfrg = { 0, 1, 1 }, - .fstrm_timer = { 1, 30*100, 3600*100}, -}; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c deleted file mode 100644 index f7ce7debe14c..000000000000 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ /dev/null @@ -1,1556 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_ioctl.h" -#include "xfs_rtalloc.h" -#include "xfs_itable.h" -#include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_bmap.h" -#include "xfs_buf_item.h" -#include "xfs_utils.h" -#include "xfs_dfrag.h" -#include "xfs_fsops.h" -#include "xfs_vnodeops.h" -#include "xfs_discard.h" -#include "xfs_quota.h" -#include "xfs_inode_item.h" -#include "xfs_export.h" -#include "xfs_trace.h" - -#include -#include -#include -#include -#include -#include -#include - -/* - * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to - * a file or fs handle. - * - * XFS_IOC_PATH_TO_FSHANDLE - * returns fs handle for a mount point or path within that mount point - * XFS_IOC_FD_TO_HANDLE - * returns full handle for a FD opened in user space - * XFS_IOC_PATH_TO_HANDLE - * returns full handle for a path - */ -int -xfs_find_handle( - unsigned int cmd, - xfs_fsop_handlereq_t *hreq) -{ - int hsize; - xfs_handle_t handle; - struct inode *inode; - struct file *file = NULL; - struct path path; - int error; - struct xfs_inode *ip; - - if (cmd == XFS_IOC_FD_TO_HANDLE) { - file = fget(hreq->fd); - if (!file) - return -EBADF; - inode = file->f_path.dentry->d_inode; - } else { - error = user_lpath((const char __user *)hreq->path, &path); - if (error) - return error; - inode = path.dentry->d_inode; - } - ip = XFS_I(inode); - - /* - * We can only generate handles for inodes residing on a XFS filesystem, - * and only for regular files, directories or symbolic links. - */ - error = -EINVAL; - if (inode->i_sb->s_magic != XFS_SB_MAGIC) - goto out_put; - - error = -EBADF; - if (!S_ISREG(inode->i_mode) && - !S_ISDIR(inode->i_mode) && - !S_ISLNK(inode->i_mode)) - goto out_put; - - - memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); - - if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { - /* - * This handle only contains an fsid, zero the rest. - */ - memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); - hsize = sizeof(xfs_fsid_t); - } else { - int lock_mode; - - lock_mode = xfs_ilock_map_shared(ip); - handle.ha_fid.fid_len = sizeof(xfs_fid_t) - - sizeof(handle.ha_fid.fid_len); - handle.ha_fid.fid_pad = 0; - handle.ha_fid.fid_gen = ip->i_d.di_gen; - handle.ha_fid.fid_ino = ip->i_ino; - xfs_iunlock_map_shared(ip, lock_mode); - - hsize = XFS_HSIZE(handle); - } - - error = -EFAULT; - if (copy_to_user(hreq->ohandle, &handle, hsize) || - copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) - goto out_put; - - error = 0; - - out_put: - if (cmd == XFS_IOC_FD_TO_HANDLE) - fput(file); - else - path_put(&path); - return error; -} - -/* - * No need to do permission checks on the various pathname components - * as the handle operations are privileged. - */ -STATIC int -xfs_handle_acceptable( - void *context, - struct dentry *dentry) -{ - return 1; -} - -/* - * Convert userspace handle data into a dentry. - */ -struct dentry * -xfs_handle_to_dentry( - struct file *parfilp, - void __user *uhandle, - u32 hlen) -{ - xfs_handle_t handle; - struct xfs_fid64 fid; - - /* - * Only allow handle opens under a directory. - */ - if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode)) - return ERR_PTR(-ENOTDIR); - - if (hlen != sizeof(xfs_handle_t)) - return ERR_PTR(-EINVAL); - if (copy_from_user(&handle, uhandle, hlen)) - return ERR_PTR(-EFAULT); - if (handle.ha_fid.fid_len != - sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len)) - return ERR_PTR(-EINVAL); - - memset(&fid, 0, sizeof(struct fid)); - fid.ino = handle.ha_fid.fid_ino; - fid.gen = handle.ha_fid.fid_gen; - - return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, - FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, - xfs_handle_acceptable, NULL); -} - -STATIC struct dentry * -xfs_handlereq_to_dentry( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen); -} - -int -xfs_open_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - const struct cred *cred = current_cred(); - int error; - int fd; - int permflag; - struct file *filp; - struct inode *inode; - struct dentry *dentry; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - - dentry = xfs_handlereq_to_dentry(parfilp, hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - inode = dentry->d_inode; - - /* Restrict xfs_open_by_handle to directories & regular files. */ - if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { - error = -XFS_ERROR(EPERM); - goto out_dput; - } - -#if BITS_PER_LONG != 32 - hreq->oflags |= O_LARGEFILE; -#endif - - /* Put open permission in namei format. */ - permflag = hreq->oflags; - if ((permflag+1) & O_ACCMODE) - permflag++; - if (permflag & O_TRUNC) - permflag |= 2; - - if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && - (permflag & FMODE_WRITE) && IS_APPEND(inode)) { - error = -XFS_ERROR(EPERM); - goto out_dput; - } - - if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { - error = -XFS_ERROR(EACCES); - goto out_dput; - } - - /* Can't write directories. */ - if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { - error = -XFS_ERROR(EISDIR); - goto out_dput; - } - - fd = get_unused_fd(); - if (fd < 0) { - error = fd; - goto out_dput; - } - - filp = dentry_open(dentry, mntget(parfilp->f_path.mnt), - hreq->oflags, cred); - if (IS_ERR(filp)) { - put_unused_fd(fd); - return PTR_ERR(filp); - } - - if (S_ISREG(inode->i_mode)) { - filp->f_flags |= O_NOATIME; - filp->f_mode |= FMODE_NOCMTIME; - } - - fd_install(fd, filp); - return fd; - - out_dput: - dput(dentry); - return error; -} - -/* - * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's - * unused first argument. - */ -STATIC int -do_readlink( - char __user *buffer, - int buflen, - const char *link) -{ - int len; - - len = PTR_ERR(link); - if (IS_ERR(link)) - goto out; - - len = strlen(link); - if (len > (unsigned) buflen) - len = buflen; - if (copy_to_user(buffer, link, len)) - len = -EFAULT; - out: - return len; -} - - -int -xfs_readlink_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq) -{ - struct dentry *dentry; - __u32 olen; - void *link; - int error; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - - dentry = xfs_handlereq_to_dentry(parfilp, hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - /* Restrict this handle operation to symlinks only. */ - if (!S_ISLNK(dentry->d_inode->i_mode)) { - error = -XFS_ERROR(EINVAL); - goto out_dput; - } - - if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { - error = -XFS_ERROR(EFAULT); - goto out_dput; - } - - link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); - if (!link) { - error = -XFS_ERROR(ENOMEM); - goto out_dput; - } - - error = -xfs_readlink(XFS_I(dentry->d_inode), link); - if (error) - goto out_kfree; - error = do_readlink(hreq->ohandle, olen, link); - if (error) - goto out_kfree; - - out_kfree: - kfree(link); - out_dput: - dput(dentry); - return error; -} - -STATIC int -xfs_fssetdm_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - struct fsdmidata fsd; - xfs_fsop_setdm_handlereq_t dmhreq; - struct dentry *dentry; - - if (!capable(CAP_MKNOD)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) - return -XFS_ERROR(EFAULT); - - dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { - error = -XFS_ERROR(EPERM); - goto out; - } - - if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { - error = -XFS_ERROR(EFAULT); - goto out; - } - - error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, - fsd.fsd_dmstate); - - out: - dput(dentry); - return error; -} - -STATIC int -xfs_attrlist_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error = -ENOMEM; - attrlist_cursor_kern_t *cursor; - xfs_fsop_attrlist_handlereq_t al_hreq; - struct dentry *dentry; - char *kbuf; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) - return -XFS_ERROR(EFAULT); - if (al_hreq.buflen > XATTR_LIST_MAX) - return -XFS_ERROR(EINVAL); - - /* - * Reject flags, only allow namespaces. - */ - if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) - return -XFS_ERROR(EINVAL); - - dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL); - if (!kbuf) - goto out_dput; - - cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, - al_hreq.flags, cursor); - if (error) - goto out_kfree; - - if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) - error = -EFAULT; - - out_kfree: - kfree(kbuf); - out_dput: - dput(dentry); - return error; -} - -int -xfs_attrmulti_attr_get( - struct inode *inode, - unsigned char *name, - unsigned char __user *ubuf, - __uint32_t *len, - __uint32_t flags) -{ - unsigned char *kbuf; - int error = EFAULT; - - if (*len > XATTR_SIZE_MAX) - return EINVAL; - kbuf = kmalloc(*len, GFP_KERNEL); - if (!kbuf) - return ENOMEM; - - error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); - if (error) - goto out_kfree; - - if (copy_to_user(ubuf, kbuf, *len)) - error = EFAULT; - - out_kfree: - kfree(kbuf); - return error; -} - -int -xfs_attrmulti_attr_set( - struct inode *inode, - unsigned char *name, - const unsigned char __user *ubuf, - __uint32_t len, - __uint32_t flags) -{ - unsigned char *kbuf; - int error = EFAULT; - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return EPERM; - if (len > XATTR_SIZE_MAX) - return EINVAL; - - kbuf = memdup_user(ubuf, len); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - - error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); - - return error; -} - -int -xfs_attrmulti_attr_remove( - struct inode *inode, - unsigned char *name, - __uint32_t flags) -{ - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return EPERM; - return xfs_attr_remove(XFS_I(inode), name, flags); -} - -STATIC int -xfs_attrmulti_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - xfs_attr_multiop_t *ops; - xfs_fsop_attrmulti_handlereq_t am_hreq; - struct dentry *dentry; - unsigned int i, size; - unsigned char *attr_name; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) - return -XFS_ERROR(EFAULT); - - /* overflow check */ - if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) - return -E2BIG; - - dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - error = E2BIG; - size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); - if (!size || size > 16 * PAGE_SIZE) - goto out_dput; - - ops = memdup_user(am_hreq.ops, size); - if (IS_ERR(ops)) { - error = PTR_ERR(ops); - goto out_dput; - } - - attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); - if (!attr_name) - goto out_kfree_ops; - - error = 0; - for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user((char *)attr_name, - ops[i].am_attrname, MAXNAMELEN); - if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = -ERANGE; - if (ops[i].am_error < 0) - break; - - switch (ops[i].am_opcode) { - case ATTR_OP_GET: - ops[i].am_error = xfs_attrmulti_attr_get( - dentry->d_inode, attr_name, - ops[i].am_attrvalue, &ops[i].am_length, - ops[i].am_flags); - break; - case ATTR_OP_SET: - ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_set( - dentry->d_inode, attr_name, - ops[i].am_attrvalue, ops[i].am_length, - ops[i].am_flags); - mnt_drop_write(parfilp->f_path.mnt); - break; - case ATTR_OP_REMOVE: - ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_remove( - dentry->d_inode, attr_name, - ops[i].am_flags); - mnt_drop_write(parfilp->f_path.mnt); - break; - default: - ops[i].am_error = EINVAL; - } - } - - if (copy_to_user(am_hreq.ops, ops, size)) - error = XFS_ERROR(EFAULT); - - kfree(attr_name); - out_kfree_ops: - kfree(ops); - out_dput: - dput(dentry); - return -error; -} - -int -xfs_ioc_space( - struct xfs_inode *ip, - struct inode *inode, - struct file *filp, - int ioflags, - unsigned int cmd, - xfs_flock64_t *bf) -{ - int attr_flags = 0; - int error; - - /* - * Only allow the sys admin to reserve space unless - * unwritten extents are enabled. - */ - if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && - !capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - - if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) - return -XFS_ERROR(EPERM); - - if (!(filp->f_mode & FMODE_WRITE)) - return -XFS_ERROR(EBADF); - - if (!S_ISREG(inode->i_mode)) - return -XFS_ERROR(EINVAL); - - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - attr_flags |= XFS_ATTR_NONBLOCK; - - if (filp->f_flags & O_DSYNC) - attr_flags |= XFS_ATTR_SYNC; - - if (ioflags & IO_INVIS) - attr_flags |= XFS_ATTR_DMI; - - error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); - return -error; -} - -STATIC int -xfs_ioc_bulkstat( - xfs_mount_t *mp, - unsigned int cmd, - void __user *arg) -{ - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; - int error; - - /* done = 1 if there are more stats to get and if bulkstat */ - /* should be called again (unused here, but used in dmapi) */ - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) - return -XFS_ERROR(EFAULT); - - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) - return -XFS_ERROR(EFAULT); - - if ((count = bulkreq.icount) <= 0) - return -XFS_ERROR(EINVAL); - - if (bulkreq.ubuffer == NULL) - return -XFS_ERROR(EINVAL); - - if (cmd == XFS_IOC_FSINUMBERS) - error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, xfs_inumbers_fmt); - else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) - error = xfs_bulkstat_single(mp, &inlast, - bulkreq.ubuffer, &done); - else /* XFS_IOC_FSBULKSTAT */ - error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, - sizeof(xfs_bstat_t), bulkreq.ubuffer, - &done); - - if (error) - return -error; - - if (bulkreq.ocount != NULL) { - if (copy_to_user(bulkreq.lastip, &inlast, - sizeof(xfs_ino_t))) - return -XFS_ERROR(EFAULT); - - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -XFS_ERROR(EFAULT); - } - - return 0; -} - -STATIC int -xfs_ioc_fsgeometry_v1( - xfs_mount_t *mp, - void __user *arg) -{ - xfs_fsop_geom_t fsgeo; - int error; - - error = xfs_fs_geometry(mp, &fsgeo, 3); - if (error) - return -error; - - /* - * Caller should have passed an argument of type - * xfs_fsop_geom_v1_t. This is a proper subset of the - * xfs_fsop_geom_t that xfs_fs_geometry() fills in. - */ - if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_ioc_fsgeometry( - xfs_mount_t *mp, - void __user *arg) -{ - xfs_fsop_geom_t fsgeo; - int error; - - error = xfs_fs_geometry(mp, &fsgeo, 4); - if (error) - return -error; - - if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) - return -XFS_ERROR(EFAULT); - return 0; -} - -/* - * Linux extended inode flags interface. - */ - -STATIC unsigned int -xfs_merge_ioc_xflags( - unsigned int flags, - unsigned int start) -{ - unsigned int xflags = start; - - if (flags & FS_IMMUTABLE_FL) - xflags |= XFS_XFLAG_IMMUTABLE; - else - xflags &= ~XFS_XFLAG_IMMUTABLE; - if (flags & FS_APPEND_FL) - xflags |= XFS_XFLAG_APPEND; - else - xflags &= ~XFS_XFLAG_APPEND; - if (flags & FS_SYNC_FL) - xflags |= XFS_XFLAG_SYNC; - else - xflags &= ~XFS_XFLAG_SYNC; - if (flags & FS_NOATIME_FL) - xflags |= XFS_XFLAG_NOATIME; - else - xflags &= ~XFS_XFLAG_NOATIME; - if (flags & FS_NODUMP_FL) - xflags |= XFS_XFLAG_NODUMP; - else - xflags &= ~XFS_XFLAG_NODUMP; - - return xflags; -} - -STATIC unsigned int -xfs_di2lxflags( - __uint16_t di_flags) -{ - unsigned int flags = 0; - - if (di_flags & XFS_DIFLAG_IMMUTABLE) - flags |= FS_IMMUTABLE_FL; - if (di_flags & XFS_DIFLAG_APPEND) - flags |= FS_APPEND_FL; - if (di_flags & XFS_DIFLAG_SYNC) - flags |= FS_SYNC_FL; - if (di_flags & XFS_DIFLAG_NOATIME) - flags |= FS_NOATIME_FL; - if (di_flags & XFS_DIFLAG_NODUMP) - flags |= FS_NODUMP_FL; - return flags; -} - -STATIC int -xfs_ioc_fsgetxattr( - xfs_inode_t *ip, - int attr, - void __user *arg) -{ - struct fsxattr fa; - - memset(&fa, 0, sizeof(struct fsxattr)); - - xfs_ilock(ip, XFS_ILOCK_SHARED); - fa.fsx_xflags = xfs_ip2xflags(ip); - fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; - fa.fsx_projid = xfs_get_projid(ip); - - if (attr) { - if (ip->i_afp) { - if (ip->i_afp->if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = ip->i_afp->if_bytes / - sizeof(xfs_bmbt_rec_t); - else - fa.fsx_nextents = ip->i_d.di_anextents; - } else - fa.fsx_nextents = 0; - } else { - if (ip->i_df.if_flags & XFS_IFEXTENTS) - fa.fsx_nextents = ip->i_df.if_bytes / - sizeof(xfs_bmbt_rec_t); - else - fa.fsx_nextents = ip->i_d.di_nextents; - } - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - if (copy_to_user(arg, &fa, sizeof(fa))) - return -EFAULT; - return 0; -} - -STATIC void -xfs_set_diflags( - struct xfs_inode *ip, - unsigned int xflags) -{ - unsigned int di_flags; - - /* can't set PREALLOC this way, just preserve it */ - di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); - if (xflags & XFS_XFLAG_IMMUTABLE) - di_flags |= XFS_DIFLAG_IMMUTABLE; - if (xflags & XFS_XFLAG_APPEND) - di_flags |= XFS_DIFLAG_APPEND; - if (xflags & XFS_XFLAG_SYNC) - di_flags |= XFS_DIFLAG_SYNC; - if (xflags & XFS_XFLAG_NOATIME) - di_flags |= XFS_DIFLAG_NOATIME; - if (xflags & XFS_XFLAG_NODUMP) - di_flags |= XFS_DIFLAG_NODUMP; - if (xflags & XFS_XFLAG_PROJINHERIT) - di_flags |= XFS_DIFLAG_PROJINHERIT; - if (xflags & XFS_XFLAG_NODEFRAG) - di_flags |= XFS_DIFLAG_NODEFRAG; - if (xflags & XFS_XFLAG_FILESTREAM) - di_flags |= XFS_DIFLAG_FILESTREAM; - if (S_ISDIR(ip->i_d.di_mode)) { - if (xflags & XFS_XFLAG_RTINHERIT) - di_flags |= XFS_DIFLAG_RTINHERIT; - if (xflags & XFS_XFLAG_NOSYMLINKS) - di_flags |= XFS_DIFLAG_NOSYMLINKS; - if (xflags & XFS_XFLAG_EXTSZINHERIT) - di_flags |= XFS_DIFLAG_EXTSZINHERIT; - } else if (S_ISREG(ip->i_d.di_mode)) { - if (xflags & XFS_XFLAG_REALTIME) - di_flags |= XFS_DIFLAG_REALTIME; - if (xflags & XFS_XFLAG_EXTSIZE) - di_flags |= XFS_DIFLAG_EXTSIZE; - } - - ip->i_d.di_flags = di_flags; -} - -STATIC void -xfs_diflags_to_linux( - struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - unsigned int xflags = xfs_ip2xflags(ip); - - if (xflags & XFS_XFLAG_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (xflags & XFS_XFLAG_APPEND) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (xflags & XFS_XFLAG_SYNC) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (xflags & XFS_XFLAG_NOATIME) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; -} - -#define FSX_PROJID 1 -#define FSX_EXTSIZE 2 -#define FSX_XFLAGS 4 -#define FSX_NONBLOCK 8 - -STATIC int -xfs_ioctl_setattr( - xfs_inode_t *ip, - struct fsxattr *fa, - int mask) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - unsigned int lock_flags = 0; - struct xfs_dquot *udqp = NULL; - struct xfs_dquot *gdqp = NULL; - struct xfs_dquot *olddquot = NULL; - int code; - - trace_xfs_ioctl_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - /* - * Disallow 32bit project ids when projid32bit feature is not enabled. - */ - if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && - !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) - return XFS_ERROR(EINVAL); - - /* - * If disk quotas is on, we make sure that the dquots do exist on disk, - * before we start any other transactions. Trying to do this later - * is messy. We don't care to take a readlock to look at the ids - * in inode here, because we can't hold it across the trans_reserve. - * If the IDs do change before we take the ilock, we're covered - * because the i_*dquot fields will get updated anyway. - */ - if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { - code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, - ip->i_d.di_gid, fa->fsx_projid, - XFS_QMOPT_PQUOTA, &udqp, &gdqp); - if (code) - return code; - } - - /* - * For the other attributes, we acquire the inode lock and - * first do an error checking pass. - */ - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); - if (code) - goto error_return; - - lock_flags = XFS_ILOCK_EXCL; - xfs_ilock(ip, lock_flags); - - /* - * CAP_FOWNER overrides the following restrictions: - * - * The user ID of the calling process must be equal - * to the file owner ID, except in cases where the - * CAP_FSETID capability is applicable. - */ - if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - - /* - * Do a quota reservation only if projid is actually going to change. - */ - if (mask & FSX_PROJID) { - if (XFS_IS_QUOTA_RUNNING(mp) && - XFS_IS_PQUOTA_ON(mp) && - xfs_get_projid(ip) != fa->fsx_projid) { - ASSERT(tp); - code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - capable(CAP_FOWNER) ? - XFS_QMOPT_FORCE_RES : 0); - if (code) /* out of quota */ - goto error_return; - } - } - - if (mask & FSX_EXTSIZE) { - /* - * Can't change extent size if any extents are allocated. - */ - if (ip->i_d.di_nextents && - ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != - fa->fsx_extsize)) { - code = XFS_ERROR(EINVAL); /* EFBIG? */ - goto error_return; - } - - /* - * Extent size must be a multiple of the appropriate block - * size, if set at all. It must also be smaller than the - * maximum extent size supported by the filesystem. - * - * Also, for non-realtime files, limit the extent size hint to - * half the size of the AGs in the filesystem so alignment - * doesn't result in extents larger than an AG. - */ - if (fa->fsx_extsize != 0) { - xfs_extlen_t size; - xfs_fsblock_t extsize_fsb; - - extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); - if (extsize_fsb > MAXEXTLEN) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - - if (XFS_IS_REALTIME_INODE(ip) || - ((mask & FSX_XFLAGS) && - (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { - size = mp->m_sb.sb_rextsize << - mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - } - - if (fa->fsx_extsize % size) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - } - } - - - if (mask & FSX_XFLAGS) { - /* - * Can't change realtime flag if any extents are allocated. - */ - if ((ip->i_d.di_nextents || ip->i_delayed_blks) && - (XFS_IS_REALTIME_INODE(ip)) != - (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - code = XFS_ERROR(EINVAL); /* EFBIG? */ - goto error_return; - } - - /* - * If realtime flag is set then must have realtime data. - */ - if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { - if ((mp->m_sb.sb_rblocks == 0) || - (mp->m_sb.sb_rextsize == 0) || - (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - } - - /* - * Can't modify an immutable/append-only file unless - * we have appropriate permission. - */ - if ((ip->i_d.di_flags & - (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || - (fa->fsx_xflags & - (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && - !capable(CAP_LINUX_IMMUTABLE)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - } - - xfs_trans_ijoin(tp, ip); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & FSX_PROJID) { - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !capable(CAP_FSETID)) - ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); - - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (xfs_get_projid(ip) != fa->fsx_projid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { - olddquot = xfs_qm_vop_chown(tp, ip, - &ip->i_gdquot, gdqp); - } - xfs_set_projid(ip, fa->fsx_projid); - - /* - * We may have to rev the inode as well as - * the superblock version number since projids didn't - * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. - */ - if (ip->i_d.di_version == 1) - xfs_bump_ino_vers2(tp, ip); - } - - } - - if (mask & FSX_EXTSIZE) - ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; - if (mask & FSX_XFLAGS) { - xfs_set_diflags(ip, fa->fsx_xflags); - xfs_diflags_to_linux(ip); - } - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - /* - * If this is a synchronous mount, make sure that the - * transaction goes to disk before returning to the user. - * This is slightly sub-optimal in that truncates require - * two sync transactions instead of one for wsync filesystems. - * One for the truncate and one for the timestamps since we - * don't want to change the timestamps unless we're sure the - * truncate worked. Truncates are less than 1% of the laddis - * mix so this probably isn't worth the trouble to optimize. - */ - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - code = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, lock_flags); - - /* - * Release any dquot(s) the inode had kept before chown. - */ - xfs_qm_dqrele(olddquot); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - - return code; - - error_return: - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - xfs_trans_cancel(tp, 0); - if (lock_flags) - xfs_iunlock(ip, lock_flags); - return code; -} - -STATIC int -xfs_ioc_fssetxattr( - xfs_inode_t *ip, - struct file *filp, - void __user *arg) -{ - struct fsxattr fa; - unsigned int mask; - - if (copy_from_user(&fa, arg, sizeof(fa))) - return -EFAULT; - - mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID; - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - mask |= FSX_NONBLOCK; - - return -xfs_ioctl_setattr(ip, &fa, mask); -} - -STATIC int -xfs_ioc_getxflags( - xfs_inode_t *ip, - void __user *arg) -{ - unsigned int flags; - - flags = xfs_di2lxflags(ip->i_d.di_flags); - if (copy_to_user(arg, &flags, sizeof(flags))) - return -EFAULT; - return 0; -} - -STATIC int -xfs_ioc_setxflags( - xfs_inode_t *ip, - struct file *filp, - void __user *arg) -{ - struct fsxattr fa; - unsigned int flags; - unsigned int mask; - - if (copy_from_user(&flags, arg, sizeof(flags))) - return -EFAULT; - - if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ - FS_NOATIME_FL | FS_NODUMP_FL | \ - FS_SYNC_FL)) - return -EOPNOTSUPP; - - mask = FSX_XFLAGS; - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - mask |= FSX_NONBLOCK; - fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); - - return -xfs_ioctl_setattr(ip, &fa, mask); -} - -STATIC int -xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) -{ - struct getbmap __user *base = *ap; - - /* copy only getbmap portion (not getbmapx) */ - if (copy_to_user(base, bmv, sizeof(struct getbmap))) - return XFS_ERROR(EFAULT); - - *ap += sizeof(struct getbmap); - return 0; -} - -STATIC int -xfs_ioc_getbmap( - struct xfs_inode *ip, - int ioflags, - unsigned int cmd, - void __user *arg) -{ - struct getbmapx bmx; - int error; - - if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) - return -XFS_ERROR(EFAULT); - - if (bmx.bmv_count < 2) - return -XFS_ERROR(EINVAL); - - bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); - if (ioflags & IO_INVIS) - bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; - - error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, - (struct getbmap *)arg+1); - if (error) - return -error; - - /* copy back header - only size of getbmap */ - if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) -{ - struct getbmapx __user *base = *ap; - - if (copy_to_user(base, bmv, sizeof(struct getbmapx))) - return XFS_ERROR(EFAULT); - - *ap += sizeof(struct getbmapx); - return 0; -} - -STATIC int -xfs_ioc_getbmapx( - struct xfs_inode *ip, - void __user *arg) -{ - struct getbmapx bmx; - int error; - - if (copy_from_user(&bmx, arg, sizeof(bmx))) - return -XFS_ERROR(EFAULT); - - if (bmx.bmv_count < 2) - return -XFS_ERROR(EINVAL); - - if (bmx.bmv_iflags & (~BMV_IF_VALID)) - return -XFS_ERROR(EINVAL); - - error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, - (struct getbmapx *)arg+1); - if (error) - return -error; - - /* copy back header */ - if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) - return -XFS_ERROR(EFAULT); - - return 0; -} - -/* - * Note: some of the ioctl's return positive numbers as a - * byte count indicating success, such as readlink_by_handle. - * So we don't "sign flip" like most other routines. This means - * true errors need to be returned as a negative value. - */ -long -xfs_file_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - void __user *arg = (void __user *)p; - int ioflags = 0; - int error; - - if (filp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - trace_xfs_file_ioctl(ip); - - switch (cmd) { - case FITRIM: - return xfs_ioc_trim(mp, arg); - case XFS_IOC_ALLOCSP: - case XFS_IOC_FREESP: - case XFS_IOC_RESVSP: - case XFS_IOC_UNRESVSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - case XFS_IOC_ZERO_RANGE: { - xfs_flock64_t bf; - - if (copy_from_user(&bf, arg, sizeof(bf))) - return -XFS_ERROR(EFAULT); - return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); - } - case XFS_IOC_DIOINFO: { - struct dioattr da; - xfs_buftarg_t *target = - XFS_IS_REALTIME_INODE(ip) ? - mp->m_rtdev_targp : mp->m_ddev_targp; - - da.d_mem = da.d_miniosz = 1 << target->bt_sshift; - da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); - - if (copy_to_user(arg, &da, sizeof(da))) - return -XFS_ERROR(EFAULT); - return 0; - } - - case XFS_IOC_FSBULKSTAT_SINGLE: - case XFS_IOC_FSBULKSTAT: - case XFS_IOC_FSINUMBERS: - return xfs_ioc_bulkstat(mp, cmd, arg); - - case XFS_IOC_FSGEOMETRY_V1: - return xfs_ioc_fsgeometry_v1(mp, arg); - - case XFS_IOC_FSGEOMETRY: - return xfs_ioc_fsgeometry(mp, arg); - - case XFS_IOC_GETVERSION: - return put_user(inode->i_generation, (int __user *)arg); - - case XFS_IOC_FSGETXATTR: - return xfs_ioc_fsgetxattr(ip, 0, arg); - case XFS_IOC_FSGETXATTRA: - return xfs_ioc_fsgetxattr(ip, 1, arg); - case XFS_IOC_FSSETXATTR: - return xfs_ioc_fssetxattr(ip, filp, arg); - case XFS_IOC_GETXFLAGS: - return xfs_ioc_getxflags(ip, arg); - case XFS_IOC_SETXFLAGS: - return xfs_ioc_setxflags(ip, filp, arg); - - case XFS_IOC_FSSETDM: { - struct fsdmidata dmi; - - if (copy_from_user(&dmi, arg, sizeof(dmi))) - return -XFS_ERROR(EFAULT); - - error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, - dmi.fsd_dmstate); - return -error; - } - - case XFS_IOC_GETBMAP: - case XFS_IOC_GETBMAPA: - return xfs_ioc_getbmap(ip, ioflags, cmd, arg); - - case XFS_IOC_GETBMAPX: - return xfs_ioc_getbmapx(ip, arg); - - case XFS_IOC_FD_TO_HANDLE: - case XFS_IOC_PATH_TO_HANDLE: - case XFS_IOC_PATH_TO_FSHANDLE: { - xfs_fsop_handlereq_t hreq; - - if (copy_from_user(&hreq, arg, sizeof(hreq))) - return -XFS_ERROR(EFAULT); - return xfs_find_handle(cmd, &hreq); - } - case XFS_IOC_OPEN_BY_HANDLE: { - xfs_fsop_handlereq_t hreq; - - if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - return xfs_open_by_handle(filp, &hreq); - } - case XFS_IOC_FSSETDM_BY_HANDLE: - return xfs_fssetdm_by_handle(filp, arg); - - case XFS_IOC_READLINK_BY_HANDLE: { - xfs_fsop_handlereq_t hreq; - - if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - return xfs_readlink_by_handle(filp, &hreq); - } - case XFS_IOC_ATTRLIST_BY_HANDLE: - return xfs_attrlist_by_handle(filp, arg); - - case XFS_IOC_ATTRMULTI_BY_HANDLE: - return xfs_attrmulti_by_handle(filp, arg); - - case XFS_IOC_SWAPEXT: { - struct xfs_swapext sxp; - - if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) - return -XFS_ERROR(EFAULT); - error = xfs_swapext(&sxp); - return -error; - } - - case XFS_IOC_FSCOUNTS: { - xfs_fsop_counts_t out; - - error = xfs_fs_counts(mp, &out); - if (error) - return -error; - - if (copy_to_user(arg, &out, sizeof(out))) - return -XFS_ERROR(EFAULT); - return 0; - } - - case XFS_IOC_SET_RESBLKS: { - xfs_fsop_resblks_t inout; - __uint64_t in; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return -XFS_ERROR(EROFS); - - if (copy_from_user(&inout, arg, sizeof(inout))) - return -XFS_ERROR(EFAULT); - - /* input parameter is passed in resblks field of structure */ - in = inout.resblks; - error = xfs_reserve_blocks(mp, &in, &inout); - if (error) - return -error; - - if (copy_to_user(arg, &inout, sizeof(inout))) - return -XFS_ERROR(EFAULT); - return 0; - } - - case XFS_IOC_GET_RESBLKS: { - xfs_fsop_resblks_t out; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - error = xfs_reserve_blocks(mp, NULL, &out); - if (error) - return -error; - - if (copy_to_user(arg, &out, sizeof(out))) - return -XFS_ERROR(EFAULT); - - return 0; - } - - case XFS_IOC_FSGROWFSDATA: { - xfs_growfs_data_t in; - - if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); - - error = xfs_growfs_data(mp, &in); - return -error; - } - - case XFS_IOC_FSGROWFSLOG: { - xfs_growfs_log_t in; - - if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); - - error = xfs_growfs_log(mp, &in); - return -error; - } - - case XFS_IOC_FSGROWFSRT: { - xfs_growfs_rt_t in; - - if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); - - error = xfs_growfs_rt(mp, &in); - return -error; - } - - case XFS_IOC_GOINGDOWN: { - __uint32_t in; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (get_user(in, (__uint32_t __user *)arg)) - return -XFS_ERROR(EFAULT); - - error = xfs_fs_goingdown(mp, in); - return -error; - } - - case XFS_IOC_ERROR_INJECTION: { - xfs_error_injection_t in; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (copy_from_user(&in, arg, sizeof(in))) - return -XFS_ERROR(EFAULT); - - error = xfs_errortag_add(in.errtag, mp); - return -error; - } - - case XFS_IOC_ERROR_CLEARALL: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - error = xfs_errortag_clearall(mp, 1); - return -error; - - default: - return -ENOTTY; - } -} diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h deleted file mode 100644 index d56173b34a2a..000000000000 --- a/fs/xfs/linux-2.6/xfs_ioctl.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2008 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_IOCTL_H__ -#define __XFS_IOCTL_H__ - -extern int -xfs_ioc_space( - struct xfs_inode *ip, - struct inode *inode, - struct file *filp, - int ioflags, - unsigned int cmd, - xfs_flock64_t *bf); - -extern int -xfs_find_handle( - unsigned int cmd, - xfs_fsop_handlereq_t *hreq); - -extern int -xfs_open_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq); - -extern int -xfs_readlink_by_handle( - struct file *parfilp, - xfs_fsop_handlereq_t *hreq); - -extern int -xfs_attrmulti_attr_get( - struct inode *inode, - unsigned char *name, - unsigned char __user *ubuf, - __uint32_t *len, - __uint32_t flags); - -extern int -xfs_attrmulti_attr_set( - struct inode *inode, - unsigned char *name, - const unsigned char __user *ubuf, - __uint32_t len, - __uint32_t flags); - -extern int -xfs_attrmulti_attr_remove( - struct inode *inode, - unsigned char *name, - __uint32_t flags); - -extern struct dentry * -xfs_handle_to_dentry( - struct file *parfilp, - void __user *uhandle, - u32 hlen); - -extern long -xfs_file_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p); - -extern long -xfs_file_compat_ioctl( - struct file *file, - unsigned int cmd, - unsigned long arg); - -#endif diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c deleted file mode 100644 index 54e623bfbb85..000000000000 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ /dev/null @@ -1,672 +0,0 @@ -/* - * Copyright (c) 2004-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include -#include -#include -#include -#include -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_vnode.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_error.h" -#include "xfs_dfrag.h" -#include "xfs_vnodeops.h" -#include "xfs_fsops.h" -#include "xfs_alloc.h" -#include "xfs_rtalloc.h" -#include "xfs_attr.h" -#include "xfs_ioctl.h" -#include "xfs_ioctl32.h" -#include "xfs_trace.h" - -#define _NATIVE_IOC(cmd, type) \ - _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) - -#ifdef BROKEN_X86_ALIGNMENT -STATIC int -xfs_compat_flock64_copyin( - xfs_flock64_t *bf, - compat_xfs_flock64_t __user *arg32) -{ - if (get_user(bf->l_type, &arg32->l_type) || - get_user(bf->l_whence, &arg32->l_whence) || - get_user(bf->l_start, &arg32->l_start) || - get_user(bf->l_len, &arg32->l_len) || - get_user(bf->l_sysid, &arg32->l_sysid) || - get_user(bf->l_pid, &arg32->l_pid) || - copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_compat_ioc_fsgeometry_v1( - struct xfs_mount *mp, - compat_xfs_fsop_geom_v1_t __user *arg32) -{ - xfs_fsop_geom_t fsgeo; - int error; - - error = xfs_fs_geometry(mp, &fsgeo, 3); - if (error) - return -error; - /* The 32-bit variant simply has some padding at the end */ - if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_compat_growfs_data_copyin( - struct xfs_growfs_data *in, - compat_xfs_growfs_data_t __user *arg32) -{ - if (get_user(in->newblocks, &arg32->newblocks) || - get_user(in->imaxpct, &arg32->imaxpct)) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_compat_growfs_rt_copyin( - struct xfs_growfs_rt *in, - compat_xfs_growfs_rt_t __user *arg32) -{ - if (get_user(in->newblocks, &arg32->newblocks) || - get_user(in->extsize, &arg32->extsize)) - return -XFS_ERROR(EFAULT); - return 0; -} - -STATIC int -xfs_inumbers_fmt_compat( - void __user *ubuffer, - const xfs_inogrp_t *buffer, - long count, - long *written) -{ - compat_xfs_inogrp_t __user *p32 = ubuffer; - long i; - - for (i = 0; i < count; i++) { - if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || - put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || - put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) - return -XFS_ERROR(EFAULT); - } - *written = count * sizeof(*p32); - return 0; -} - -#else -#define xfs_inumbers_fmt_compat xfs_inumbers_fmt -#endif /* BROKEN_X86_ALIGNMENT */ - -STATIC int -xfs_ioctl32_bstime_copyin( - xfs_bstime_t *bstime, - compat_xfs_bstime_t __user *bstime32) -{ - compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */ - - if (get_user(sec32, &bstime32->tv_sec) || - get_user(bstime->tv_nsec, &bstime32->tv_nsec)) - return -XFS_ERROR(EFAULT); - bstime->tv_sec = sec32; - return 0; -} - -/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ -STATIC int -xfs_ioctl32_bstat_copyin( - xfs_bstat_t *bstat, - compat_xfs_bstat_t __user *bstat32) -{ - if (get_user(bstat->bs_ino, &bstat32->bs_ino) || - get_user(bstat->bs_mode, &bstat32->bs_mode) || - get_user(bstat->bs_nlink, &bstat32->bs_nlink) || - get_user(bstat->bs_uid, &bstat32->bs_uid) || - get_user(bstat->bs_gid, &bstat32->bs_gid) || - get_user(bstat->bs_rdev, &bstat32->bs_rdev) || - get_user(bstat->bs_blksize, &bstat32->bs_blksize) || - get_user(bstat->bs_size, &bstat32->bs_size) || - xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) || - xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) || - xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) || - get_user(bstat->bs_blocks, &bstat32->bs_size) || - get_user(bstat->bs_xflags, &bstat32->bs_size) || - get_user(bstat->bs_extsize, &bstat32->bs_extsize) || - get_user(bstat->bs_extents, &bstat32->bs_extents) || - get_user(bstat->bs_gen, &bstat32->bs_gen) || - get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || - get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || - get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || - get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || - get_user(bstat->bs_aextents, &bstat32->bs_aextents)) - return -XFS_ERROR(EFAULT); - return 0; -} - -/* XFS_IOC_FSBULKSTAT and friends */ - -STATIC int -xfs_bstime_store_compat( - compat_xfs_bstime_t __user *p32, - const xfs_bstime_t *p) -{ - __s32 sec32; - - sec32 = p->tv_sec; - if (put_user(sec32, &p32->tv_sec) || - put_user(p->tv_nsec, &p32->tv_nsec)) - return -XFS_ERROR(EFAULT); - return 0; -} - -/* Return 0 on success or positive error (to xfs_bulkstat()) */ -STATIC int -xfs_bulkstat_one_fmt_compat( - void __user *ubuffer, - int ubsize, - int *ubused, - const xfs_bstat_t *buffer) -{ - compat_xfs_bstat_t __user *p32 = ubuffer; - - if (ubsize < sizeof(*p32)) - return XFS_ERROR(ENOMEM); - - if (put_user(buffer->bs_ino, &p32->bs_ino) || - put_user(buffer->bs_mode, &p32->bs_mode) || - put_user(buffer->bs_nlink, &p32->bs_nlink) || - put_user(buffer->bs_uid, &p32->bs_uid) || - put_user(buffer->bs_gid, &p32->bs_gid) || - put_user(buffer->bs_rdev, &p32->bs_rdev) || - put_user(buffer->bs_blksize, &p32->bs_blksize) || - put_user(buffer->bs_size, &p32->bs_size) || - xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || - xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || - xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || - put_user(buffer->bs_blocks, &p32->bs_blocks) || - put_user(buffer->bs_xflags, &p32->bs_xflags) || - put_user(buffer->bs_extsize, &p32->bs_extsize) || - put_user(buffer->bs_extents, &p32->bs_extents) || - put_user(buffer->bs_gen, &p32->bs_gen) || - put_user(buffer->bs_projid, &p32->bs_projid) || - put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || - put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || - put_user(buffer->bs_dmstate, &p32->bs_dmstate) || - put_user(buffer->bs_aextents, &p32->bs_aextents)) - return XFS_ERROR(EFAULT); - if (ubused) - *ubused = sizeof(*p32); - return 0; -} - -STATIC int -xfs_bulkstat_one_compat( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* buffer to place output in */ - int ubsize, /* size of buffer */ - int *ubused, /* bytes used by me */ - int *stat) /* BULKSTAT_RV_... */ -{ - return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, - xfs_bulkstat_one_fmt_compat, - ubused, stat); -} - -/* copied from xfs_ioctl.c */ -STATIC int -xfs_compat_ioc_bulkstat( - xfs_mount_t *mp, - unsigned int cmd, - compat_xfs_fsop_bulkreq_t __user *p32) -{ - u32 addr; - xfs_fsop_bulkreq_t bulkreq; - int count; /* # of records returned */ - xfs_ino_t inlast; /* last inode number */ - int done; - int error; - - /* done = 1 if there are more stats to get and if bulkstat */ - /* should be called again (unused here, but used in dmapi) */ - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - - if (XFS_FORCED_SHUTDOWN(mp)) - return -XFS_ERROR(EIO); - - if (get_user(addr, &p32->lastip)) - return -XFS_ERROR(EFAULT); - bulkreq.lastip = compat_ptr(addr); - if (get_user(bulkreq.icount, &p32->icount) || - get_user(addr, &p32->ubuffer)) - return -XFS_ERROR(EFAULT); - bulkreq.ubuffer = compat_ptr(addr); - if (get_user(addr, &p32->ocount)) - return -XFS_ERROR(EFAULT); - bulkreq.ocount = compat_ptr(addr); - - if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) - return -XFS_ERROR(EFAULT); - - if ((count = bulkreq.icount) <= 0) - return -XFS_ERROR(EINVAL); - - if (bulkreq.ubuffer == NULL) - return -XFS_ERROR(EINVAL); - - if (cmd == XFS_IOC_FSINUMBERS_32) { - error = xfs_inumbers(mp, &inlast, &count, - bulkreq.ubuffer, xfs_inumbers_fmt_compat); - } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { - int res; - - error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, - sizeof(compat_xfs_bstat_t), 0, &res); - } else if (cmd == XFS_IOC_FSBULKSTAT_32) { - error = xfs_bulkstat(mp, &inlast, &count, - xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), - bulkreq.ubuffer, &done); - } else - error = XFS_ERROR(EINVAL); - if (error) - return -error; - - if (bulkreq.ocount != NULL) { - if (copy_to_user(bulkreq.lastip, &inlast, - sizeof(xfs_ino_t))) - return -XFS_ERROR(EFAULT); - - if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) - return -XFS_ERROR(EFAULT); - } - - return 0; -} - -STATIC int -xfs_compat_handlereq_copyin( - xfs_fsop_handlereq_t *hreq, - compat_xfs_fsop_handlereq_t __user *arg32) -{ - compat_xfs_fsop_handlereq_t hreq32; - - if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - - hreq->fd = hreq32.fd; - hreq->path = compat_ptr(hreq32.path); - hreq->oflags = hreq32.oflags; - hreq->ihandle = compat_ptr(hreq32.ihandle); - hreq->ihandlen = hreq32.ihandlen; - hreq->ohandle = compat_ptr(hreq32.ohandle); - hreq->ohandlen = compat_ptr(hreq32.ohandlen); - - return 0; -} - -STATIC struct dentry * -xfs_compat_handlereq_to_dentry( - struct file *parfilp, - compat_xfs_fsop_handlereq_t *hreq) -{ - return xfs_handle_to_dentry(parfilp, - compat_ptr(hreq->ihandle), hreq->ihandlen); -} - -STATIC int -xfs_compat_attrlist_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - attrlist_cursor_kern_t *cursor; - compat_xfs_fsop_attrlist_handlereq_t al_hreq; - struct dentry *dentry; - char *kbuf; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&al_hreq, arg, - sizeof(compat_xfs_fsop_attrlist_handlereq_t))) - return -XFS_ERROR(EFAULT); - if (al_hreq.buflen > XATTR_LIST_MAX) - return -XFS_ERROR(EINVAL); - - /* - * Reject flags, only allow namespaces. - */ - if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) - return -XFS_ERROR(EINVAL); - - dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - error = -ENOMEM; - kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); - if (!kbuf) - goto out_dput; - - cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; - error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, - al_hreq.flags, cursor); - if (error) - goto out_kfree; - - if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) - error = -EFAULT; - - out_kfree: - kfree(kbuf); - out_dput: - dput(dentry); - return error; -} - -STATIC int -xfs_compat_attrmulti_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - compat_xfs_attr_multiop_t *ops; - compat_xfs_fsop_attrmulti_handlereq_t am_hreq; - struct dentry *dentry; - unsigned int i, size; - unsigned char *attr_name; - - if (!capable(CAP_SYS_ADMIN)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&am_hreq, arg, - sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) - return -XFS_ERROR(EFAULT); - - /* overflow check */ - if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t)) - return -E2BIG; - - dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - error = E2BIG; - size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); - if (!size || size > 16 * PAGE_SIZE) - goto out_dput; - - ops = memdup_user(compat_ptr(am_hreq.ops), size); - if (IS_ERR(ops)) { - error = PTR_ERR(ops); - goto out_dput; - } - - attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); - if (!attr_name) - goto out_kfree_ops; - - error = 0; - for (i = 0; i < am_hreq.opcount; i++) { - ops[i].am_error = strncpy_from_user((char *)attr_name, - compat_ptr(ops[i].am_attrname), - MAXNAMELEN); - if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) - error = -ERANGE; - if (ops[i].am_error < 0) - break; - - switch (ops[i].am_opcode) { - case ATTR_OP_GET: - ops[i].am_error = xfs_attrmulti_attr_get( - dentry->d_inode, attr_name, - compat_ptr(ops[i].am_attrvalue), - &ops[i].am_length, ops[i].am_flags); - break; - case ATTR_OP_SET: - ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_set( - dentry->d_inode, attr_name, - compat_ptr(ops[i].am_attrvalue), - ops[i].am_length, ops[i].am_flags); - mnt_drop_write(parfilp->f_path.mnt); - break; - case ATTR_OP_REMOVE: - ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); - if (ops[i].am_error) - break; - ops[i].am_error = xfs_attrmulti_attr_remove( - dentry->d_inode, attr_name, - ops[i].am_flags); - mnt_drop_write(parfilp->f_path.mnt); - break; - default: - ops[i].am_error = EINVAL; - } - } - - if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) - error = XFS_ERROR(EFAULT); - - kfree(attr_name); - out_kfree_ops: - kfree(ops); - out_dput: - dput(dentry); - return -error; -} - -STATIC int -xfs_compat_fssetdm_by_handle( - struct file *parfilp, - void __user *arg) -{ - int error; - struct fsdmidata fsd; - compat_xfs_fsop_setdm_handlereq_t dmhreq; - struct dentry *dentry; - - if (!capable(CAP_MKNOD)) - return -XFS_ERROR(EPERM); - if (copy_from_user(&dmhreq, arg, - sizeof(compat_xfs_fsop_setdm_handlereq_t))) - return -XFS_ERROR(EFAULT); - - dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - - if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { - error = -XFS_ERROR(EPERM); - goto out; - } - - if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { - error = -XFS_ERROR(EFAULT); - goto out; - } - - error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, - fsd.fsd_dmstate); - -out: - dput(dentry); - return error; -} - -long -xfs_file_compat_ioctl( - struct file *filp, - unsigned cmd, - unsigned long p) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - void __user *arg = (void __user *)p; - int ioflags = 0; - int error; - - if (filp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - trace_xfs_file_compat_ioctl(ip); - - switch (cmd) { - /* No size or alignment issues on any arch */ - case XFS_IOC_DIOINFO: - case XFS_IOC_FSGEOMETRY: - case XFS_IOC_FSGETXATTR: - case XFS_IOC_FSSETXATTR: - case XFS_IOC_FSGETXATTRA: - case XFS_IOC_FSSETDM: - case XFS_IOC_GETBMAP: - case XFS_IOC_GETBMAPA: - case XFS_IOC_GETBMAPX: - case XFS_IOC_FSCOUNTS: - case XFS_IOC_SET_RESBLKS: - case XFS_IOC_GET_RESBLKS: - case XFS_IOC_FSGROWFSLOG: - case XFS_IOC_GOINGDOWN: - case XFS_IOC_ERROR_INJECTION: - case XFS_IOC_ERROR_CLEARALL: - return xfs_file_ioctl(filp, cmd, p); -#ifndef BROKEN_X86_ALIGNMENT - /* These are handled fine if no alignment issues */ - case XFS_IOC_ALLOCSP: - case XFS_IOC_FREESP: - case XFS_IOC_RESVSP: - case XFS_IOC_UNRESVSP: - case XFS_IOC_ALLOCSP64: - case XFS_IOC_FREESP64: - case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - case XFS_IOC_FSGEOMETRY_V1: - case XFS_IOC_FSGROWFSDATA: - case XFS_IOC_FSGROWFSRT: - case XFS_IOC_ZERO_RANGE: - return xfs_file_ioctl(filp, cmd, p); -#else - case XFS_IOC_ALLOCSP_32: - case XFS_IOC_FREESP_32: - case XFS_IOC_ALLOCSP64_32: - case XFS_IOC_FREESP64_32: - case XFS_IOC_RESVSP_32: - case XFS_IOC_UNRESVSP_32: - case XFS_IOC_RESVSP64_32: - case XFS_IOC_UNRESVSP64_32: - case XFS_IOC_ZERO_RANGE_32: { - struct xfs_flock64 bf; - - if (xfs_compat_flock64_copyin(&bf, arg)) - return -XFS_ERROR(EFAULT); - cmd = _NATIVE_IOC(cmd, struct xfs_flock64); - return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); - } - case XFS_IOC_FSGEOMETRY_V1_32: - return xfs_compat_ioc_fsgeometry_v1(mp, arg); - case XFS_IOC_FSGROWFSDATA_32: { - struct xfs_growfs_data in; - - if (xfs_compat_growfs_data_copyin(&in, arg)) - return -XFS_ERROR(EFAULT); - error = xfs_growfs_data(mp, &in); - return -error; - } - case XFS_IOC_FSGROWFSRT_32: { - struct xfs_growfs_rt in; - - if (xfs_compat_growfs_rt_copyin(&in, arg)) - return -XFS_ERROR(EFAULT); - error = xfs_growfs_rt(mp, &in); - return -error; - } -#endif - /* long changes size, but xfs only copiese out 32 bits */ - case XFS_IOC_GETXFLAGS_32: - case XFS_IOC_SETXFLAGS_32: - case XFS_IOC_GETVERSION_32: - cmd = _NATIVE_IOC(cmd, long); - return xfs_file_ioctl(filp, cmd, p); - case XFS_IOC_SWAPEXT_32: { - struct xfs_swapext sxp; - struct compat_xfs_swapext __user *sxu = arg; - - /* Bulk copy in up to the sx_stat field, then copy bstat */ - if (copy_from_user(&sxp, sxu, - offsetof(struct xfs_swapext, sx_stat)) || - xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) - return -XFS_ERROR(EFAULT); - error = xfs_swapext(&sxp); - return -error; - } - case XFS_IOC_FSBULKSTAT_32: - case XFS_IOC_FSBULKSTAT_SINGLE_32: - case XFS_IOC_FSINUMBERS_32: - return xfs_compat_ioc_bulkstat(mp, cmd, arg); - case XFS_IOC_FD_TO_HANDLE_32: - case XFS_IOC_PATH_TO_HANDLE_32: - case XFS_IOC_PATH_TO_FSHANDLE_32: { - struct xfs_fsop_handlereq hreq; - - if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); - cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); - return xfs_find_handle(cmd, &hreq); - } - case XFS_IOC_OPEN_BY_HANDLE_32: { - struct xfs_fsop_handlereq hreq; - - if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); - return xfs_open_by_handle(filp, &hreq); - } - case XFS_IOC_READLINK_BY_HANDLE_32: { - struct xfs_fsop_handlereq hreq; - - if (xfs_compat_handlereq_copyin(&hreq, arg)) - return -XFS_ERROR(EFAULT); - return xfs_readlink_by_handle(filp, &hreq); - } - case XFS_IOC_ATTRLIST_BY_HANDLE_32: - return xfs_compat_attrlist_by_handle(filp, arg); - case XFS_IOC_ATTRMULTI_BY_HANDLE_32: - return xfs_compat_attrmulti_by_handle(filp, arg); - case XFS_IOC_FSSETDM_BY_HANDLE_32: - return xfs_compat_fssetdm_by_handle(filp, arg); - default: - return -XFS_ERROR(ENOIOCTLCMD); - } -} diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h deleted file mode 100644 index 80f4060e8970..000000000000 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright (c) 2004-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_IOCTL32_H__ -#define __XFS_IOCTL32_H__ - -#include - -/* - * on 32-bit arches, ioctl argument structures may have different sizes - * and/or alignment. We define compat structures which match the - * 32-bit sizes/alignments here, and their associated ioctl numbers. - * - * xfs_ioctl32.c contains routines to copy these structures in and out. - */ - -/* stock kernel-level ioctls we support */ -#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS -#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS -#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION - -/* - * On intel, even if sizes match, alignment and/or padding may differ. - */ -#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) -#define BROKEN_X86_ALIGNMENT -#define __compat_packed __attribute__((packed)) -#else -#define __compat_packed -#endif - -typedef struct compat_xfs_bstime { - compat_time_t tv_sec; /* seconds */ - __s32 tv_nsec; /* and nanoseconds */ -} compat_xfs_bstime_t; - -typedef struct compat_xfs_bstat { - __u64 bs_ino; /* inode number */ - __u16 bs_mode; /* type and mode */ - __u16 bs_nlink; /* number of links */ - __u32 bs_uid; /* user id */ - __u32 bs_gid; /* group id */ - __u32 bs_rdev; /* device value */ - __s32 bs_blksize; /* block size */ - __s64 bs_size; /* file size */ - compat_xfs_bstime_t bs_atime; /* access time */ - compat_xfs_bstime_t bs_mtime; /* modify time */ - compat_xfs_bstime_t bs_ctime; /* inode change time */ - int64_t bs_blocks; /* number of blocks */ - __u32 bs_xflags; /* extended flags */ - __s32 bs_extsize; /* extent size */ - __s32 bs_extents; /* number of extents */ - __u32 bs_gen; /* generation count */ - __u16 bs_projid_lo; /* lower part of project id */ -#define bs_projid bs_projid_lo /* (previously just bs_projid) */ - __u16 bs_projid_hi; /* high part of project id */ - unsigned char bs_pad[12]; /* pad space, unused */ - __u32 bs_dmevmask; /* DMIG event mask */ - __u16 bs_dmstate; /* DMIG state info */ - __u16 bs_aextents; /* attribute number of extents */ -} __compat_packed compat_xfs_bstat_t; - -typedef struct compat_xfs_fsop_bulkreq { - compat_uptr_t lastip; /* last inode # pointer */ - __s32 icount; /* count of entries in buffer */ - compat_uptr_t ubuffer; /* user buffer for inode desc. */ - compat_uptr_t ocount; /* output count pointer */ -} compat_xfs_fsop_bulkreq_t; - -#define XFS_IOC_FSBULKSTAT_32 \ - _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) -#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ - _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) -#define XFS_IOC_FSINUMBERS_32 \ - _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) - -typedef struct compat_xfs_fsop_handlereq { - __u32 fd; /* fd for FD_TO_HANDLE */ - compat_uptr_t path; /* user pathname */ - __u32 oflags; /* open flags */ - compat_uptr_t ihandle; /* user supplied handle */ - __u32 ihandlen; /* user supplied length */ - compat_uptr_t ohandle; /* user buffer for handle */ - compat_uptr_t ohandlen; /* user buffer length */ -} compat_xfs_fsop_handlereq_t; - -#define XFS_IOC_PATH_TO_FSHANDLE_32 \ - _IOWR('X', 104, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_PATH_TO_HANDLE_32 \ - _IOWR('X', 105, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_FD_TO_HANDLE_32 \ - _IOWR('X', 106, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_OPEN_BY_HANDLE_32 \ - _IOWR('X', 107, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_READLINK_BY_HANDLE_32 \ - _IOWR('X', 108, struct compat_xfs_fsop_handlereq) - -/* The bstat field in the swapext struct needs translation */ -typedef struct compat_xfs_swapext { - __int64_t sx_version; /* version */ - __int64_t sx_fdtarget; /* fd of target file */ - __int64_t sx_fdtmp; /* fd of tmp file */ - xfs_off_t sx_offset; /* offset into file */ - xfs_off_t sx_length; /* leng from offset */ - char sx_pad[16]; /* pad space, unused */ - compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ -} __compat_packed compat_xfs_swapext_t; - -#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) - -typedef struct compat_xfs_fsop_attrlist_handlereq { - struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ - struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ - __u32 flags; /* which namespace to use */ - __u32 buflen; /* length of buffer supplied */ - compat_uptr_t buffer; /* returned names */ -} __compat_packed compat_xfs_fsop_attrlist_handlereq_t; - -/* Note: actually this is read/write */ -#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \ - _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq) - -/* am_opcodes defined in xfs_fs.h */ -typedef struct compat_xfs_attr_multiop { - __u32 am_opcode; - __s32 am_error; - compat_uptr_t am_attrname; - compat_uptr_t am_attrvalue; - __u32 am_length; - __u32 am_flags; -} compat_xfs_attr_multiop_t; - -typedef struct compat_xfs_fsop_attrmulti_handlereq { - struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ - __u32 opcount;/* count of following multiop */ - /* ptr to compat_xfs_attr_multiop */ - compat_uptr_t ops; /* attr_multi data */ -} compat_xfs_fsop_attrmulti_handlereq_t; - -#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \ - _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) - -typedef struct compat_xfs_fsop_setdm_handlereq { - struct compat_xfs_fsop_handlereq hreq; /* handle information */ - /* ptr to struct fsdmidata */ - compat_uptr_t data; /* DMAPI data */ -} compat_xfs_fsop_setdm_handlereq_t; - -#define XFS_IOC_FSSETDM_BY_HANDLE_32 \ - _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq) - -#ifdef BROKEN_X86_ALIGNMENT -/* on ia32 l_start is on a 32-bit boundary */ -typedef struct compat_xfs_flock64 { - __s16 l_type; - __s16 l_whence; - __s64 l_start __attribute__((packed)); - /* len == 0 means until end of file */ - __s64 l_len __attribute__((packed)); - __s32 l_sysid; - __u32 l_pid; - __s32 l_pad[4]; /* reserve area */ -} compat_xfs_flock64_t; - -#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64) -#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64) -#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64) -#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64) -#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64) -#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) -#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) -#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) -#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64) - -typedef struct compat_xfs_fsop_geom_v1 { - __u32 blocksize; /* filesystem (data) block size */ - __u32 rtextsize; /* realtime extent size */ - __u32 agblocks; /* fsblocks in an AG */ - __u32 agcount; /* number of allocation groups */ - __u32 logblocks; /* fsblocks in the log */ - __u32 sectsize; /* (data) sector size, bytes */ - __u32 inodesize; /* inode size in bytes */ - __u32 imaxpct; /* max allowed inode space(%) */ - __u64 datablocks; /* fsblocks in data subvolume */ - __u64 rtblocks; /* fsblocks in realtime subvol */ - __u64 rtextents; /* rt extents in realtime subvol*/ - __u64 logstart; /* starting fsblock of the log */ - unsigned char uuid[16]; /* unique id of the filesystem */ - __u32 sunit; /* stripe unit, fsblocks */ - __u32 swidth; /* stripe width, fsblocks */ - __s32 version; /* structure version */ - __u32 flags; /* superblock version flags */ - __u32 logsectsize; /* log sector size, bytes */ - __u32 rtsectsize; /* realtime sector size, bytes */ - __u32 dirblocksize; /* directory block size, bytes */ -} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; - -#define XFS_IOC_FSGEOMETRY_V1_32 \ - _IOR('X', 100, struct compat_xfs_fsop_geom_v1) - -typedef struct compat_xfs_inogrp { - __u64 xi_startino; /* starting inode number */ - __s32 xi_alloccount; /* # bits set in allocmask */ - __u64 xi_allocmask; /* mask of allocated inodes */ -} __attribute__((packed)) compat_xfs_inogrp_t; - -/* These growfs input structures have padding on the end, so must translate */ -typedef struct compat_xfs_growfs_data { - __u64 newblocks; /* new data subvol size, fsblocks */ - __u32 imaxpct; /* new inode space percentage limit */ -} __attribute__((packed)) compat_xfs_growfs_data_t; - -typedef struct compat_xfs_growfs_rt { - __u64 newblocks; /* new realtime size, fsblocks */ - __u32 extsize; /* new realtime extent size, fsblocks */ -} __attribute__((packed)) compat_xfs_growfs_rt_t; - -#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data) -#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt) - -#endif /* BROKEN_X86_ALIGNMENT */ - -#endif /* __XFS_IOCTL32_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c deleted file mode 100644 index b9c172b3fbbe..000000000000 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ /dev/null @@ -1,1210 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_acl.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_rw.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_utils.h" -#include "xfs_vnodeops.h" -#include "xfs_inode_item.h" -#include "xfs_trace.h" - -#include -#include -#include -#include -#include -#include -#include - -/* - * Bring the timestamps in the XFS inode uptodate. - * - * Used before writing the inode to disk. - */ -void -xfs_synchronize_times( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; - ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; - ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; - ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; -} - -/* - * If the linux inode is valid, mark it dirty. - * Used when committing a dirty inode into a transaction so that - * the inode will get written back by the linux code - */ -void -xfs_mark_inode_dirty_sync( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) - mark_inode_dirty_sync(inode); -} - -void -xfs_mark_inode_dirty( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) - mark_inode_dirty(inode); -} - -/* - * Hook in SELinux. This is not quite correct yet, what we really need - * here (as we do for default ACLs) is a mechanism by which creation of - * these attrs can be journalled at inode creation time (along with the - * inode, of course, such that log replay can't cause these to be lost). - */ -STATIC int -xfs_init_security( - struct inode *inode, - struct inode *dir, - const struct qstr *qstr) -{ - struct xfs_inode *ip = XFS_I(inode); - size_t length; - void *value; - unsigned char *name; - int error; - - error = security_inode_init_security(inode, dir, qstr, (char **)&name, - &value, &length); - if (error) { - if (error == -EOPNOTSUPP) - return 0; - return -error; - } - - error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); - - kfree(name); - kfree(value); - return error; -} - -static void -xfs_dentry_to_name( - struct xfs_name *namep, - struct dentry *dentry) -{ - namep->name = dentry->d_name.name; - namep->len = dentry->d_name.len; -} - -STATIC void -xfs_cleanup_inode( - struct inode *dir, - struct inode *inode, - struct dentry *dentry) -{ - struct xfs_name teardown; - - /* Oh, the horror. - * If we can't add the ACL or we fail in - * xfs_init_security we must back out. - * ENOSPC can hit here, among other things. - */ - xfs_dentry_to_name(&teardown, dentry); - - xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); - iput(inode); -} - -STATIC int -xfs_vn_mknod( - struct inode *dir, - struct dentry *dentry, - int mode, - dev_t rdev) -{ - struct inode *inode; - struct xfs_inode *ip = NULL; - struct posix_acl *default_acl = NULL; - struct xfs_name name; - int error; - - /* - * Irix uses Missed'em'V split, but doesn't want to see - * the upper 5 bits of (14bit) major. - */ - if (S_ISCHR(mode) || S_ISBLK(mode)) { - if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) - return -EINVAL; - rdev = sysv_encode_dev(rdev); - } else { - rdev = 0; - } - - if (IS_POSIXACL(dir)) { - default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); - if (IS_ERR(default_acl)) - return PTR_ERR(default_acl); - - if (!default_acl) - mode &= ~current_umask(); - } - - xfs_dentry_to_name(&name, dentry); - error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); - if (unlikely(error)) - goto out_free_acl; - - inode = VFS_I(ip); - - error = xfs_init_security(inode, dir, &dentry->d_name); - if (unlikely(error)) - goto out_cleanup_inode; - - if (default_acl) { - error = -xfs_inherit_acl(inode, default_acl); - default_acl = NULL; - if (unlikely(error)) - goto out_cleanup_inode; - } - - - d_instantiate(dentry, inode); - return -error; - - out_cleanup_inode: - xfs_cleanup_inode(dir, inode, dentry); - out_free_acl: - posix_acl_release(default_acl); - return -error; -} - -STATIC int -xfs_vn_create( - struct inode *dir, - struct dentry *dentry, - int mode, - struct nameidata *nd) -{ - return xfs_vn_mknod(dir, dentry, mode, 0); -} - -STATIC int -xfs_vn_mkdir( - struct inode *dir, - struct dentry *dentry, - int mode) -{ - return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); -} - -STATIC struct dentry * -xfs_vn_lookup( - struct inode *dir, - struct dentry *dentry, - struct nameidata *nd) -{ - struct xfs_inode *cip; - struct xfs_name name; - int error; - - if (dentry->d_name.len >= MAXNAMELEN) - return ERR_PTR(-ENAMETOOLONG); - - xfs_dentry_to_name(&name, dentry); - error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); - if (unlikely(error)) { - if (unlikely(error != ENOENT)) - return ERR_PTR(-error); - d_add(dentry, NULL); - return NULL; - } - - return d_splice_alias(VFS_I(cip), dentry); -} - -STATIC struct dentry * -xfs_vn_ci_lookup( - struct inode *dir, - struct dentry *dentry, - struct nameidata *nd) -{ - struct xfs_inode *ip; - struct xfs_name xname; - struct xfs_name ci_name; - struct qstr dname; - int error; - - if (dentry->d_name.len >= MAXNAMELEN) - return ERR_PTR(-ENAMETOOLONG); - - xfs_dentry_to_name(&xname, dentry); - error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); - if (unlikely(error)) { - if (unlikely(error != ENOENT)) - return ERR_PTR(-error); - /* - * call d_add(dentry, NULL) here when d_drop_negative_children - * is called in xfs_vn_mknod (ie. allow negative dentries - * with CI filesystems). - */ - return NULL; - } - - /* if exact match, just splice and exit */ - if (!ci_name.name) - return d_splice_alias(VFS_I(ip), dentry); - - /* else case-insensitive match... */ - dname.name = ci_name.name; - dname.len = ci_name.len; - dentry = d_add_ci(dentry, VFS_I(ip), &dname); - kmem_free(ci_name.name); - return dentry; -} - -STATIC int -xfs_vn_link( - struct dentry *old_dentry, - struct inode *dir, - struct dentry *dentry) -{ - struct inode *inode = old_dentry->d_inode; - struct xfs_name name; - int error; - - xfs_dentry_to_name(&name, dentry); - - error = xfs_link(XFS_I(dir), XFS_I(inode), &name); - if (unlikely(error)) - return -error; - - ihold(inode); - d_instantiate(dentry, inode); - return 0; -} - -STATIC int -xfs_vn_unlink( - struct inode *dir, - struct dentry *dentry) -{ - struct xfs_name name; - int error; - - xfs_dentry_to_name(&name, dentry); - - error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); - if (error) - return error; - - /* - * With unlink, the VFS makes the dentry "negative": no inode, - * but still hashed. This is incompatible with case-insensitive - * mode, so invalidate (unhash) the dentry in CI-mode. - */ - if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb)) - d_invalidate(dentry); - return 0; -} - -STATIC int -xfs_vn_symlink( - struct inode *dir, - struct dentry *dentry, - const char *symname) -{ - struct inode *inode; - struct xfs_inode *cip = NULL; - struct xfs_name name; - int error; - mode_t mode; - - mode = S_IFLNK | - (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); - xfs_dentry_to_name(&name, dentry); - - error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); - if (unlikely(error)) - goto out; - - inode = VFS_I(cip); - - error = xfs_init_security(inode, dir, &dentry->d_name); - if (unlikely(error)) - goto out_cleanup_inode; - - d_instantiate(dentry, inode); - return 0; - - out_cleanup_inode: - xfs_cleanup_inode(dir, inode, dentry); - out: - return -error; -} - -STATIC int -xfs_vn_rename( - struct inode *odir, - struct dentry *odentry, - struct inode *ndir, - struct dentry *ndentry) -{ - struct inode *new_inode = ndentry->d_inode; - struct xfs_name oname; - struct xfs_name nname; - - xfs_dentry_to_name(&oname, odentry); - xfs_dentry_to_name(&nname, ndentry); - - return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), - XFS_I(ndir), &nname, new_inode ? - XFS_I(new_inode) : NULL); -} - -/* - * careful here - this function can get called recursively, so - * we need to be very careful about how much stack we use. - * uio is kmalloced for this reason... - */ -STATIC void * -xfs_vn_follow_link( - struct dentry *dentry, - struct nameidata *nd) -{ - char *link; - int error = -ENOMEM; - - link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); - if (!link) - goto out_err; - - error = -xfs_readlink(XFS_I(dentry->d_inode), link); - if (unlikely(error)) - goto out_kfree; - - nd_set_link(nd, link); - return NULL; - - out_kfree: - kfree(link); - out_err: - nd_set_link(nd, ERR_PTR(error)); - return NULL; -} - -STATIC void -xfs_vn_put_link( - struct dentry *dentry, - struct nameidata *nd, - void *p) -{ - char *s = nd_get_link(nd); - - if (!IS_ERR(s)) - kfree(s); -} - -STATIC int -xfs_vn_getattr( - struct vfsmount *mnt, - struct dentry *dentry, - struct kstat *stat) -{ - struct inode *inode = dentry->d_inode; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - - trace_xfs_getattr(ip); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - stat->size = XFS_ISIZE(ip); - stat->dev = inode->i_sb->s_dev; - stat->mode = ip->i_d.di_mode; - stat->nlink = ip->i_d.di_nlink; - stat->uid = ip->i_d.di_uid; - stat->gid = ip->i_d.di_gid; - stat->ino = ip->i_ino; - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; - stat->blocks = - XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); - - - switch (inode->i_mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - stat->blksize = BLKDEV_IOSIZE; - stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, - sysv_minor(ip->i_df.if_u2.if_rdev)); - break; - default: - if (XFS_IS_REALTIME_INODE(ip)) { - /* - * If the file blocks are being allocated from a - * realtime volume, then return the inode's realtime - * extent size or the realtime volume's extent size. - */ - stat->blksize = - xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; - } else - stat->blksize = xfs_preferred_iosize(mp); - stat->rdev = 0; - break; - } - - return 0; -} - -int -xfs_setattr_nonsize( - struct xfs_inode *ip, - struct iattr *iattr, - int flags) -{ - xfs_mount_t *mp = ip->i_mount; - struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; - xfs_trans_t *tp; - int error; - uid_t uid = 0, iuid = 0; - gid_t gid = 0, igid = 0; - struct xfs_dquot *udqp = NULL, *gdqp = NULL; - struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; - - trace_xfs_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - error = -inode_change_ok(inode, iattr); - if (error) - return XFS_ERROR(error); - - ASSERT((mask & ATTR_SIZE) == 0); - - /* - * If disk quotas is on, we make sure that the dquots do exist on disk, - * before we start any other transactions. Trying to do this later - * is messy. We don't care to take a readlock to look at the ids - * in inode here, because we can't hold it across the trans_reserve. - * If the IDs do change before we take the ilock, we're covered - * because the i_*dquot fields will get updated anyway. - */ - if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { - uint qflags = 0; - - if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { - uid = iattr->ia_uid; - qflags |= XFS_QMOPT_UQUOTA; - } else { - uid = ip->i_d.di_uid; - } - if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { - gid = iattr->ia_gid; - qflags |= XFS_QMOPT_GQUOTA; - } else { - gid = ip->i_d.di_gid; - } - - /* - * We take a reference when we initialize udqp and gdqp, - * so it is important that we never blindly double trip on - * the same variable. See xfs_create() for an example. - */ - ASSERT(udqp == NULL); - ASSERT(gdqp == NULL); - error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), - qflags, &udqp, &gdqp); - if (error) - return error; - } - - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); - error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); - if (error) - goto out_dqrele; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * These IDs could have changed since we last looked at them. - * But, we're assured that if the ownership did change - * while we didn't have the inode locked, inode's dquot(s) - * would have changed also. - */ - iuid = ip->i_d.di_uid; - igid = ip->i_d.di_gid; - gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; - uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; - - /* - * Do a quota reservation only if uid/gid is actually - * going to change. - */ - if (XFS_IS_QUOTA_RUNNING(mp) && - ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || - (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { - ASSERT(tp); - error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, - capable(CAP_FOWNER) ? - XFS_QMOPT_FORCE_RES : 0); - if (error) /* out of quota */ - goto out_trans_cancel; - } - } - - xfs_trans_ijoin(tp, ip); - - /* - * Change file ownership. Must be the owner or privileged. - */ - if (mask & (ATTR_UID|ATTR_GID)) { - /* - * CAP_FSETID overrides the following restrictions: - * - * The set-user-ID and set-group-ID bits of a file will be - * cleared upon successful return from chown() - */ - if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && - !capable(CAP_FSETID)) - ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); - - /* - * Change the ownerships and register quota modifications - * in the transaction. - */ - if (iuid != uid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { - ASSERT(mask & ATTR_UID); - ASSERT(udqp); - olddquot1 = xfs_qm_vop_chown(tp, ip, - &ip->i_udquot, udqp); - } - ip->i_d.di_uid = uid; - inode->i_uid = uid; - } - if (igid != gid) { - if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { - ASSERT(!XFS_IS_PQUOTA_ON(mp)); - ASSERT(mask & ATTR_GID); - ASSERT(gdqp); - olddquot2 = xfs_qm_vop_chown(tp, ip, - &ip->i_gdquot, gdqp); - } - ip->i_d.di_gid = gid; - inode->i_gid = gid; - } - } - - /* - * Change file access modes. - */ - if (mask & ATTR_MODE) { - umode_t mode = iattr->ia_mode; - - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - mode &= ~S_ISGID; - - ip->i_d.di_mode &= S_IFMT; - ip->i_d.di_mode |= mode & ~S_IFMT; - - inode->i_mode &= S_IFMT; - inode->i_mode |= mode & ~S_IFMT; - } - - /* - * Change file access or modified times. - */ - if (mask & ATTR_ATIME) { - inode->i_atime = iattr->ia_atime; - ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; - ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - ip->i_update_core = 1; - } - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - /* - * Release any dquot(s) the inode had kept before chown. - */ - xfs_qm_dqrele(olddquot1); - xfs_qm_dqrele(olddquot2); - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - - if (error) - return XFS_ERROR(error); - - /* - * XXX(hch): Updating the ACL entries is not atomic vs the i_mode - * update. We could avoid this with linked transactions - * and passing down the transaction pointer all the way - * to attr_set. No previous user of the generic - * Posix ACL code seems to care about this issue either. - */ - if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { - error = -xfs_acl_chmod(inode); - if (error) - return XFS_ERROR(error); - } - - return 0; - -out_trans_cancel: - xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); -out_dqrele: - xfs_qm_dqrele(udqp); - xfs_qm_dqrele(gdqp); - return error; -} - -/* - * Truncate file. Must have write permission and not be a directory. - */ -int -xfs_setattr_size( - struct xfs_inode *ip, - struct iattr *iattr, - int flags) -{ - struct xfs_mount *mp = ip->i_mount; - struct inode *inode = VFS_I(ip); - int mask = iattr->ia_valid; - struct xfs_trans *tp; - int error; - uint lock_flags; - uint commit_flags = 0; - - trace_xfs_setattr(ip); - - if (mp->m_flags & XFS_MOUNT_RDONLY) - return XFS_ERROR(EROFS); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - error = -inode_change_ok(inode, iattr); - if (error) - return XFS_ERROR(error); - - ASSERT(S_ISREG(ip->i_d.di_mode)); - ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| - ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); - - lock_flags = XFS_ILOCK_EXCL; - if (!(flags & XFS_ATTR_NOLOCK)) - lock_flags |= XFS_IOLOCK_EXCL; - xfs_ilock(ip, lock_flags); - - /* - * Short circuit the truncate case for zero length files. - */ - if (iattr->ia_size == 0 && - ip->i_size == 0 && ip->i_d.di_nextents == 0) { - if (!(mask & (ATTR_CTIME|ATTR_MTIME))) - goto out_unlock; - - /* - * Use the regular setattr path to update the timestamps. - */ - xfs_iunlock(ip, lock_flags); - iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(ip, iattr, 0); - } - - /* - * Make sure that the dquots are attached to the inode. - */ - error = xfs_qm_dqattach_locked(ip, 0); - if (error) - goto out_unlock; - - /* - * Now we can make the changes. Before we join the inode to the - * transaction, take care of the part of the truncation that must be - * done without the inode lock. This needs to be done before joining - * the inode to the transaction, because the inode cannot be unlocked - * once it is a part of the transaction. - */ - if (iattr->ia_size > ip->i_size) { - /* - * Do the first part of growing a file: zero any data in the - * last block that is beyond the old EOF. We need to do this - * before the inode is joined to the transaction to modify - * i_size. - */ - error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); - if (error) - goto out_unlock; - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - lock_flags &= ~XFS_ILOCK_EXCL; - - /* - * We are going to log the inode size change in this transaction so - * any previous writes that are beyond the on disk EOF and the new - * EOF that have not been written out need to be written here. If we - * do not write the data out, we expose ourselves to the null files - * problem. - * - * Only flush from the on disk size to the smaller of the in memory - * file size or the new size as that's the range we really care about - * here and prevents waiting for other data not within the range we - * care about here. - */ - if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { - error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, - XBF_ASYNC, FI_NONE); - if (error) - goto out_unlock; - } - - /* - * Wait for all I/O to complete. - */ - xfs_ioend_wait(ip); - - error = -block_truncate_page(inode->i_mapping, iattr->ia_size, - xfs_get_blocks); - if (error) - goto out_unlock; - - tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); - error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT); - if (error) - goto out_trans_cancel; - - truncate_setsize(inode, iattr->ia_size); - - commit_flags = XFS_TRANS_RELEASE_LOG_RES; - lock_flags |= XFS_ILOCK_EXCL; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - xfs_trans_ijoin(tp, ip); - - /* - * Only change the c/mtime if we are changing the size or we are - * explicitly asked to change it. This handles the semantic difference - * between truncate() and ftruncate() as implemented in the VFS. - * - * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a - * special case where we need to update the times despite not having - * these flags set. For all other operations the VFS set these flags - * explicitly if it wants a timestamp update. - */ - if (iattr->ia_size != ip->i_size && - (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { - iattr->ia_ctime = iattr->ia_mtime = - current_fs_time(inode->i_sb); - mask |= ATTR_CTIME | ATTR_MTIME; - } - - if (iattr->ia_size > ip->i_size) { - ip->i_d.di_size = iattr->ia_size; - ip->i_size = iattr->ia_size; - } else if (iattr->ia_size <= ip->i_size || - (iattr->ia_size == 0 && ip->i_d.di_nextents)) { - error = xfs_itruncate_data(&tp, ip, iattr->ia_size); - if (error) - goto out_trans_abort; - - /* - * Truncated "down", so we're removing references to old data - * here - if we delay flushing for a long time, we expose - * ourselves unduly to the notorious NULL files problem. So, - * we mark this inode and flush it when the file is closed, - * and do not wait the usual (long) time for writeout. - */ - xfs_iflags_set(ip, XFS_ITRUNCATED); - } - - if (mask & ATTR_CTIME) { - inode->i_ctime = iattr->ia_ctime; - ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; - ip->i_update_core = 1; - } - if (mask & ATTR_MTIME) { - inode->i_mtime = iattr->ia_mtime; - ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; - ip->i_update_core = 1; - } - - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - - XFS_STATS_INC(xs_ig_attrchg); - - if (mp->m_flags & XFS_MOUNT_WSYNC) - xfs_trans_set_sync(tp); - - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -out_unlock: - if (lock_flags) - xfs_iunlock(ip, lock_flags); - return error; - -out_trans_abort: - commit_flags |= XFS_TRANS_ABORT; -out_trans_cancel: - xfs_trans_cancel(tp, commit_flags); - goto out_unlock; -} - -STATIC int -xfs_vn_setattr( - struct dentry *dentry, - struct iattr *iattr) -{ - if (iattr->ia_valid & ATTR_SIZE) - return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); - return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); -} - -#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) - -/* - * Call fiemap helper to fill in user data. - * Returns positive errors to xfs_getbmap. - */ -STATIC int -xfs_fiemap_format( - void **arg, - struct getbmapx *bmv, - int *full) -{ - int error; - struct fiemap_extent_info *fieinfo = *arg; - u32 fiemap_flags = 0; - u64 logical, physical, length; - - /* Do nothing for a hole */ - if (bmv->bmv_block == -1LL) - return 0; - - logical = BBTOB(bmv->bmv_offset); - physical = BBTOB(bmv->bmv_block); - length = BBTOB(bmv->bmv_length); - - if (bmv->bmv_oflags & BMV_OF_PREALLOC) - fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; - else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { - fiemap_flags |= FIEMAP_EXTENT_DELALLOC; - physical = 0; /* no block yet */ - } - if (bmv->bmv_oflags & BMV_OF_LAST) - fiemap_flags |= FIEMAP_EXTENT_LAST; - - error = fiemap_fill_next_extent(fieinfo, logical, physical, - length, fiemap_flags); - if (error > 0) { - error = 0; - *full = 1; /* user array now full */ - } - - return -error; -} - -STATIC int -xfs_vn_fiemap( - struct inode *inode, - struct fiemap_extent_info *fieinfo, - u64 start, - u64 length) -{ - xfs_inode_t *ip = XFS_I(inode); - struct getbmapx bm; - int error; - - error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); - if (error) - return error; - - /* Set up bmap header for xfs internal routine */ - bm.bmv_offset = BTOBB(start); - /* Special case for whole file */ - if (length == FIEMAP_MAX_OFFSET) - bm.bmv_length = -1LL; - else - bm.bmv_length = BTOBB(length); - - /* We add one because in getbmap world count includes the header */ - bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : - fieinfo->fi_extents_max + 1; - bm.bmv_count = min_t(__s32, bm.bmv_count, - (PAGE_SIZE * 16 / sizeof(struct getbmapx))); - bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; - if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) - bm.bmv_iflags |= BMV_IF_ATTRFORK; - if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) - bm.bmv_iflags |= BMV_IF_DELALLOC; - - error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); - if (error) - return -error; - - return 0; -} - -static const struct inode_operations xfs_inode_operations = { - .get_acl = xfs_get_acl, - .getattr = xfs_vn_getattr, - .setattr = xfs_vn_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, - .listxattr = xfs_vn_listxattr, - .fiemap = xfs_vn_fiemap, -}; - -static const struct inode_operations xfs_dir_inode_operations = { - .create = xfs_vn_create, - .lookup = xfs_vn_lookup, - .link = xfs_vn_link, - .unlink = xfs_vn_unlink, - .symlink = xfs_vn_symlink, - .mkdir = xfs_vn_mkdir, - /* - * Yes, XFS uses the same method for rmdir and unlink. - * - * There are some subtile differences deeper in the code, - * but we use S_ISDIR to check for those. - */ - .rmdir = xfs_vn_unlink, - .mknod = xfs_vn_mknod, - .rename = xfs_vn_rename, - .get_acl = xfs_get_acl, - .getattr = xfs_vn_getattr, - .setattr = xfs_vn_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, - .listxattr = xfs_vn_listxattr, -}; - -static const struct inode_operations xfs_dir_ci_inode_operations = { - .create = xfs_vn_create, - .lookup = xfs_vn_ci_lookup, - .link = xfs_vn_link, - .unlink = xfs_vn_unlink, - .symlink = xfs_vn_symlink, - .mkdir = xfs_vn_mkdir, - /* - * Yes, XFS uses the same method for rmdir and unlink. - * - * There are some subtile differences deeper in the code, - * but we use S_ISDIR to check for those. - */ - .rmdir = xfs_vn_unlink, - .mknod = xfs_vn_mknod, - .rename = xfs_vn_rename, - .get_acl = xfs_get_acl, - .getattr = xfs_vn_getattr, - .setattr = xfs_vn_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, - .listxattr = xfs_vn_listxattr, -}; - -static const struct inode_operations xfs_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = xfs_vn_follow_link, - .put_link = xfs_vn_put_link, - .get_acl = xfs_get_acl, - .getattr = xfs_vn_getattr, - .setattr = xfs_vn_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .removexattr = generic_removexattr, - .listxattr = xfs_vn_listxattr, -}; - -STATIC void -xfs_diflags_to_iflags( - struct inode *inode, - struct xfs_inode *ip) -{ - if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; -} - -/* - * Initialize the Linux inode, set up the operation vectors and - * unlock the inode. - * - * When reading existing inodes from disk this is called directly - * from xfs_iget, when creating a new inode it is called from - * xfs_ialloc after setting up the inode. - * - * We are always called with an uninitialised linux inode here. - * We need to initialise the necessary fields and take a reference - * on it. - */ -void -xfs_setup_inode( - struct xfs_inode *ip) -{ - struct inode *inode = &ip->i_vnode; - - inode->i_ino = ip->i_ino; - inode->i_state = I_NEW; - - inode_sb_list_add(inode); - /* make the inode look hashed for the writeback code */ - hlist_add_fake(&inode->i_hash); - - inode->i_mode = ip->i_d.di_mode; - inode->i_nlink = ip->i_d.di_nlink; - inode->i_uid = ip->i_d.di_uid; - inode->i_gid = ip->i_d.di_gid; - - switch (inode->i_mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - inode->i_rdev = - MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, - sysv_minor(ip->i_df.if_u2.if_rdev)); - break; - default: - inode->i_rdev = 0; - break; - } - - inode->i_generation = ip->i_d.di_gen; - i_size_write(inode, ip->i_d.di_size); - inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; - inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; - inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; - inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; - inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; - inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; - xfs_diflags_to_iflags(inode, ip); - - switch (inode->i_mode & S_IFMT) { - case S_IFREG: - inode->i_op = &xfs_inode_operations; - inode->i_fop = &xfs_file_operations; - inode->i_mapping->a_ops = &xfs_address_space_operations; - break; - case S_IFDIR: - if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) - inode->i_op = &xfs_dir_ci_inode_operations; - else - inode->i_op = &xfs_dir_inode_operations; - inode->i_fop = &xfs_dir_file_operations; - break; - case S_IFLNK: - inode->i_op = &xfs_symlink_inode_operations; - if (!(ip->i_df.if_flags & XFS_IFINLINE)) - inode->i_mapping->a_ops = &xfs_address_space_operations; - break; - default: - inode->i_op = &xfs_inode_operations; - init_special_inode(inode, inode->i_mode, inode->i_rdev); - break; - } - - /* - * If there is no attribute fork no ACL can exist on this inode, - * and it can't have any file capabilities attached to it either. - */ - if (!XFS_IFORK_Q(ip)) { - inode_has_no_xattr(inode); - cache_no_acl(inode); - } - - xfs_iflags_clear(ip, XFS_INEW); - barrier(); - - unlock_new_inode(inode); -} diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h deleted file mode 100644 index ef41c92ce66e..000000000000 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_IOPS_H__ -#define __XFS_IOPS_H__ - -struct xfs_inode; - -extern const struct file_operations xfs_file_operations; -extern const struct file_operations xfs_dir_file_operations; - -extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); - -extern void xfs_setup_inode(struct xfs_inode *); - -#endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h deleted file mode 100644 index 1e8a45e74c3e..000000000000 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ /dev/null @@ -1,309 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_LINUX__ -#define __XFS_LINUX__ - -#include - -/* - * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. - * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. - */ -#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64) -# define XFS_BIG_BLKNOS 1 -# define XFS_BIG_INUMS 1 -#else -# define XFS_BIG_BLKNOS 0 -# define XFS_BIG_INUMS 0 -#endif - -#include "xfs_types.h" - -#include "kmem.h" -#include "mrlock.h" -#include "time.h" -#include "uuid.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "xfs_vnode.h" -#include "xfs_stats.h" -#include "xfs_sysctl.h" -#include "xfs_iops.h" -#include "xfs_aops.h" -#include "xfs_super.h" -#include "xfs_buf.h" -#include "xfs_message.h" - -#ifdef __BIG_ENDIAN -#define XFS_NATIVE_HOST 1 -#else -#undef XFS_NATIVE_HOST -#endif - -/* - * Feature macros (disable/enable) - */ -#ifdef CONFIG_SMP -#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ -#else -#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ -#endif - -#define irix_sgid_inherit xfs_params.sgid_inherit.val -#define irix_symlink_mode xfs_params.symlink_mode.val -#define xfs_panic_mask xfs_params.panic_mask.val -#define xfs_error_level xfs_params.error_level.val -#define xfs_syncd_centisecs xfs_params.syncd_timer.val -#define xfs_stats_clear xfs_params.stats_clear.val -#define xfs_inherit_sync xfs_params.inherit_sync.val -#define xfs_inherit_nodump xfs_params.inherit_nodump.val -#define xfs_inherit_noatime xfs_params.inherit_noatim.val -#define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val -#define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val -#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val -#define xfs_rotorstep xfs_params.rotorstep.val -#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val -#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val - -#define current_cpu() (raw_smp_processor_id()) -#define current_pid() (current->pid) -#define current_test_flags(f) (current->flags & (f)) -#define current_set_flags_nested(sp, f) \ - (*(sp) = current->flags, current->flags |= (f)) -#define current_clear_flags_nested(sp, f) \ - (*(sp) = current->flags, current->flags &= ~(f)) -#define current_restore_flags_nested(sp, f) \ - (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) - -#define spinlock_destroy(lock) - -#define NBBY 8 /* number of bits per byte */ - -/* - * Size of block device i/o is parameterized here. - * Currently the system supports page-sized i/o. - */ -#define BLKDEV_IOSHIFT PAGE_CACHE_SHIFT -#define BLKDEV_IOSIZE (1<> 32; - __low = c; - if (__high) { - __upper = __high % (b); - __high = __high / (b); - } - asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); - asm("":"=A" (c):"a" (__low),"d" (__high)); - *(__u64 *)a = c; - return __mod; - } - } - - /* NOTREACHED */ - return 0; -} - -/* Side effect free 64 bit mod operation */ -static inline __u32 xfs_do_mod(void *a, __u32 b, int n) -{ - switch (n) { - case 4: - return *(__u32 *)a % b; - case 8: - { - unsigned long __upper, __low, __high, __mod; - __u64 c = *(__u64 *)a; - __upper = __high = c >> 32; - __low = c; - if (__high) { - __upper = __high % (b); - __high = __high / (b); - } - asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); - asm("":"=A" (c):"a" (__low),"d" (__high)); - return __mod; - } - } - - /* NOTREACHED */ - return 0; -} -#else -static inline __u32 xfs_do_div(void *a, __u32 b, int n) -{ - __u32 mod; - - switch (n) { - case 4: - mod = *(__u32 *)a % b; - *(__u32 *)a = *(__u32 *)a / b; - return mod; - case 8: - mod = do_div(*(__u64 *)a, b); - return mod; - } - - /* NOTREACHED */ - return 0; -} - -/* Side effect free 64 bit mod operation */ -static inline __u32 xfs_do_mod(void *a, __u32 b, int n) -{ - switch (n) { - case 4: - return *(__u32 *)a % b; - case 8: - { - __u64 c = *(__u64 *)a; - return do_div(c, b); - } - } - - /* NOTREACHED */ - return 0; -} -#endif - -#undef do_div -#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) -#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) - -static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) -{ - x += y - 1; - do_div(x, y); - return(x * y); -} - -static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) -{ - x += y - 1; - do_div(x, y); - return x; -} - -/* ARM old ABI has some weird alignment/padding */ -#if defined(__arm__) && !defined(__ARM_EABI__) -#define __arch_pack __attribute__((packed)) -#else -#define __arch_pack -#endif - -#define ASSERT_ALWAYS(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) - -#ifndef DEBUG -#define ASSERT(expr) ((void)0) - -#ifndef STATIC -# define STATIC static noinline -#endif - -#else /* DEBUG */ - -#define ASSERT(expr) \ - (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) - -#ifndef STATIC -# define STATIC noinline -#endif - -#endif /* DEBUG */ - -#endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c deleted file mode 100644 index bd672def95ac..000000000000 --- a/fs/xfs/linux-2.6/xfs_message.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" - -/* - * XFS logging functions - */ -static void -__xfs_printk( - const char *level, - const struct xfs_mount *mp, - struct va_format *vaf) -{ - if (mp && mp->m_fsname) { - printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); - return; - } - printk("%sXFS: %pV\n", level, vaf); -} - -#define define_xfs_printk_level(func, kern_level) \ -void func(const struct xfs_mount *mp, const char *fmt, ...) \ -{ \ - struct va_format vaf; \ - va_list args; \ - \ - va_start(args, fmt); \ - \ - vaf.fmt = fmt; \ - vaf.va = &args; \ - \ - __xfs_printk(kern_level, mp, &vaf); \ - va_end(args); \ -} \ - -define_xfs_printk_level(xfs_emerg, KERN_EMERG); -define_xfs_printk_level(xfs_alert, KERN_ALERT); -define_xfs_printk_level(xfs_crit, KERN_CRIT); -define_xfs_printk_level(xfs_err, KERN_ERR); -define_xfs_printk_level(xfs_warn, KERN_WARNING); -define_xfs_printk_level(xfs_notice, KERN_NOTICE); -define_xfs_printk_level(xfs_info, KERN_INFO); -#ifdef DEBUG -define_xfs_printk_level(xfs_debug, KERN_DEBUG); -#endif - -void -xfs_alert_tag( - const struct xfs_mount *mp, - int panic_tag, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - int do_panic = 0; - - if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { - xfs_alert(mp, "Transforming an alert into a BUG."); - do_panic = 1; - } - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - __xfs_printk(KERN_ALERT, mp, &vaf); - va_end(args); - - BUG_ON(do_panic); -} - -void -assfail(char *expr, char *file, int line) -{ - xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d", - expr, file, line); - BUG(); -} - -void -xfs_hex_dump(void *p, int length) -{ - print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); -} diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h deleted file mode 100644 index 7fb7ea007672..000000000000 --- a/fs/xfs/linux-2.6/xfs_message.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef __XFS_MESSAGE_H -#define __XFS_MESSAGE_H 1 - -struct xfs_mount; - -extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, - const char *fmt, ...) - __attribute__ ((format (printf, 3, 4))); -extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); - -#ifdef DEBUG -extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) - __attribute__ ((format (printf, 2, 3))); -#else -static inline void -__attribute__ ((format (printf, 2, 3))) -xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) -{ -} -#endif - -extern void assfail(char *expr, char *f, int l); - -extern void xfs_hex_dump(void *p, int length); - -#endif /* __XFS_MESSAGE_H */ diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c deleted file mode 100644 index 7e76f537abb7..000000000000 --- a/fs/xfs/linux-2.6/xfs_quotaops.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (c) 2008, Christoph Hellwig - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_sb.h" -#include "xfs_inum.h" -#include "xfs_log.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_quota.h" -#include "xfs_trans.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_qm.h" -#include - - -STATIC int -xfs_quota_type(int type) -{ - switch (type) { - case USRQUOTA: - return XFS_DQ_USER; - case GRPQUOTA: - return XFS_DQ_GROUP; - default: - return XFS_DQ_PROJ; - } -} - -STATIC int -xfs_fs_get_xstate( - struct super_block *sb, - struct fs_quota_stat *fqs) -{ - struct xfs_mount *mp = XFS_M(sb); - - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; - return -xfs_qm_scall_getqstat(mp, fqs); -} - -STATIC int -xfs_fs_set_xstate( - struct super_block *sb, - unsigned int uflags, - int op) -{ - struct xfs_mount *mp = XFS_M(sb); - unsigned int flags = 0; - - if (sb->s_flags & MS_RDONLY) - return -EROFS; - if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; - - if (uflags & FS_QUOTA_UDQ_ACCT) - flags |= XFS_UQUOTA_ACCT; - if (uflags & FS_QUOTA_PDQ_ACCT) - flags |= XFS_PQUOTA_ACCT; - if (uflags & FS_QUOTA_GDQ_ACCT) - flags |= XFS_GQUOTA_ACCT; - if (uflags & FS_QUOTA_UDQ_ENFD) - flags |= XFS_UQUOTA_ENFD; - if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) - flags |= XFS_OQUOTA_ENFD; - - switch (op) { - case Q_XQUOTAON: - return -xfs_qm_scall_quotaon(mp, flags); - case Q_XQUOTAOFF: - if (!XFS_IS_QUOTA_ON(mp)) - return -EINVAL; - return -xfs_qm_scall_quotaoff(mp, flags); - case Q_XQUOTARM: - if (XFS_IS_QUOTA_ON(mp)) - return -EINVAL; - return -xfs_qm_scall_trunc_qfiles(mp, flags); - } - - return -EINVAL; -} - -STATIC int -xfs_fs_get_dqblk( - struct super_block *sb, - int type, - qid_t id, - struct fs_disk_quota *fdq) -{ - struct xfs_mount *mp = XFS_M(sb); - - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; - if (!XFS_IS_QUOTA_ON(mp)) - return -ESRCH; - - return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq); -} - -STATIC int -xfs_fs_set_dqblk( - struct super_block *sb, - int type, - qid_t id, - struct fs_disk_quota *fdq) -{ - struct xfs_mount *mp = XFS_M(sb); - - if (sb->s_flags & MS_RDONLY) - return -EROFS; - if (!XFS_IS_QUOTA_RUNNING(mp)) - return -ENOSYS; - if (!XFS_IS_QUOTA_ON(mp)) - return -ESRCH; - - return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); -} - -const struct quotactl_ops xfs_quotactl_operations = { - .get_xstate = xfs_fs_get_xstate, - .set_xstate = xfs_fs_set_xstate, - .get_dqblk = xfs_fs_get_dqblk, - .set_dqblk = xfs_fs_set_dqblk, -}; diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c deleted file mode 100644 index 76fdc5861932..000000000000 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include - -DEFINE_PER_CPU(struct xfsstats, xfsstats); - -static int xfs_stat_proc_show(struct seq_file *m, void *v) -{ - int c, i, j, val; - __uint64_t xs_xstrat_bytes = 0; - __uint64_t xs_write_bytes = 0; - __uint64_t xs_read_bytes = 0; - - static const struct xstats_entry { - char *desc; - int endpoint; - } xstats[] = { - { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC }, - { "abt", XFSSTAT_END_ALLOC_BTREE }, - { "blk_map", XFSSTAT_END_BLOCK_MAPPING }, - { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE }, - { "dir", XFSSTAT_END_DIRECTORY_OPS }, - { "trans", XFSSTAT_END_TRANSACTIONS }, - { "ig", XFSSTAT_END_INODE_OPS }, - { "log", XFSSTAT_END_LOG_OPS }, - { "push_ail", XFSSTAT_END_TAIL_PUSHING }, - { "xstrat", XFSSTAT_END_WRITE_CONVERT }, - { "rw", XFSSTAT_END_READ_WRITE_OPS }, - { "attr", XFSSTAT_END_ATTRIBUTE_OPS }, - { "icluster", XFSSTAT_END_INODE_CLUSTER }, - { "vnodes", XFSSTAT_END_VNODE_OPS }, - { "buf", XFSSTAT_END_BUF }, - { "abtb2", XFSSTAT_END_ABTB_V2 }, - { "abtc2", XFSSTAT_END_ABTC_V2 }, - { "bmbt2", XFSSTAT_END_BMBT_V2 }, - { "ibt2", XFSSTAT_END_IBT_V2 }, - }; - - /* Loop over all stats groups */ - for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { - seq_printf(m, "%s", xstats[i].desc); - /* inner loop does each group */ - while (j < xstats[i].endpoint) { - val = 0; - /* sum over all cpus */ - for_each_possible_cpu(c) - val += *(((__u32*)&per_cpu(xfsstats, c) + j)); - seq_printf(m, " %u", val); - j++; - } - seq_putc(m, '\n'); - } - /* extra precision counters */ - for_each_possible_cpu(i) { - xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; - xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; - xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; - } - - seq_printf(m, "xpc %Lu %Lu %Lu\n", - xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); - seq_printf(m, "debug %u\n", -#if defined(DEBUG) - 1); -#else - 0); -#endif - return 0; -} - -static int xfs_stat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xfs_stat_proc_show, NULL); -} - -static const struct file_operations xfs_stat_proc_fops = { - .owner = THIS_MODULE, - .open = xfs_stat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -int -xfs_init_procfs(void) -{ - if (!proc_mkdir("fs/xfs", NULL)) - goto out; - - if (!proc_create("fs/xfs/stat", 0, NULL, - &xfs_stat_proc_fops)) - goto out_remove_entry; - return 0; - - out_remove_entry: - remove_proc_entry("fs/xfs", NULL); - out: - return -ENOMEM; -} - -void -xfs_cleanup_procfs(void) -{ - remove_proc_entry("fs/xfs/stat", NULL); - remove_proc_entry("fs/xfs", NULL); -} diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h deleted file mode 100644 index 736854b1ca1a..000000000000 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (c) 2000,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_STATS_H__ -#define __XFS_STATS_H__ - - -#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) - -#include - -/* - * XFS global statistics - */ -struct xfsstats { -# define XFSSTAT_END_EXTENT_ALLOC 4 - __uint32_t xs_allocx; - __uint32_t xs_allocb; - __uint32_t xs_freex; - __uint32_t xs_freeb; -# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4) - __uint32_t xs_abt_lookup; - __uint32_t xs_abt_compare; - __uint32_t xs_abt_insrec; - __uint32_t xs_abt_delrec; -# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7) - __uint32_t xs_blk_mapr; - __uint32_t xs_blk_mapw; - __uint32_t xs_blk_unmap; - __uint32_t xs_add_exlist; - __uint32_t xs_del_exlist; - __uint32_t xs_look_exlist; - __uint32_t xs_cmp_exlist; -# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4) - __uint32_t xs_bmbt_lookup; - __uint32_t xs_bmbt_compare; - __uint32_t xs_bmbt_insrec; - __uint32_t xs_bmbt_delrec; -# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4) - __uint32_t xs_dir_lookup; - __uint32_t xs_dir_create; - __uint32_t xs_dir_remove; - __uint32_t xs_dir_getdents; -# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3) - __uint32_t xs_trans_sync; - __uint32_t xs_trans_async; - __uint32_t xs_trans_empty; -# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7) - __uint32_t xs_ig_attempts; - __uint32_t xs_ig_found; - __uint32_t xs_ig_frecycle; - __uint32_t xs_ig_missed; - __uint32_t xs_ig_dup; - __uint32_t xs_ig_reclaims; - __uint32_t xs_ig_attrchg; -# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5) - __uint32_t xs_log_writes; - __uint32_t xs_log_blocks; - __uint32_t xs_log_noiclogs; - __uint32_t xs_log_force; - __uint32_t xs_log_force_sleep; -# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10) - __uint32_t xs_try_logspace; - __uint32_t xs_sleep_logspace; - __uint32_t xs_push_ail; - __uint32_t xs_push_ail_success; - __uint32_t xs_push_ail_pushbuf; - __uint32_t xs_push_ail_pinned; - __uint32_t xs_push_ail_locked; - __uint32_t xs_push_ail_flushing; - __uint32_t xs_push_ail_restarts; - __uint32_t xs_push_ail_flush; -# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2) - __uint32_t xs_xstrat_quick; - __uint32_t xs_xstrat_split; -# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2) - __uint32_t xs_write_calls; - __uint32_t xs_read_calls; -# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4) - __uint32_t xs_attr_get; - __uint32_t xs_attr_set; - __uint32_t xs_attr_remove; - __uint32_t xs_attr_list; -# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3) - __uint32_t xs_iflush_count; - __uint32_t xs_icluster_flushcnt; - __uint32_t xs_icluster_flushinode; -# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8) - __uint32_t vn_active; /* # vnodes not on free lists */ - __uint32_t vn_alloc; /* # times vn_alloc called */ - __uint32_t vn_get; /* # times vn_get called */ - __uint32_t vn_hold; /* # times vn_hold called */ - __uint32_t vn_rele; /* # times vn_rele called */ - __uint32_t vn_reclaim; /* # times vn_reclaim called */ - __uint32_t vn_remove; /* # times vn_remove called */ - __uint32_t vn_free; /* # times vn_free called */ -#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) - __uint32_t xb_get; - __uint32_t xb_create; - __uint32_t xb_get_locked; - __uint32_t xb_get_locked_waited; - __uint32_t xb_busy_locked; - __uint32_t xb_miss_locked; - __uint32_t xb_page_retries; - __uint32_t xb_page_found; - __uint32_t xb_get_read; -/* Version 2 btree counters */ -#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15) - __uint32_t xs_abtb_2_lookup; - __uint32_t xs_abtb_2_compare; - __uint32_t xs_abtb_2_insrec; - __uint32_t xs_abtb_2_delrec; - __uint32_t xs_abtb_2_newroot; - __uint32_t xs_abtb_2_killroot; - __uint32_t xs_abtb_2_increment; - __uint32_t xs_abtb_2_decrement; - __uint32_t xs_abtb_2_lshift; - __uint32_t xs_abtb_2_rshift; - __uint32_t xs_abtb_2_split; - __uint32_t xs_abtb_2_join; - __uint32_t xs_abtb_2_alloc; - __uint32_t xs_abtb_2_free; - __uint32_t xs_abtb_2_moves; -#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15) - __uint32_t xs_abtc_2_lookup; - __uint32_t xs_abtc_2_compare; - __uint32_t xs_abtc_2_insrec; - __uint32_t xs_abtc_2_delrec; - __uint32_t xs_abtc_2_newroot; - __uint32_t xs_abtc_2_killroot; - __uint32_t xs_abtc_2_increment; - __uint32_t xs_abtc_2_decrement; - __uint32_t xs_abtc_2_lshift; - __uint32_t xs_abtc_2_rshift; - __uint32_t xs_abtc_2_split; - __uint32_t xs_abtc_2_join; - __uint32_t xs_abtc_2_alloc; - __uint32_t xs_abtc_2_free; - __uint32_t xs_abtc_2_moves; -#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15) - __uint32_t xs_bmbt_2_lookup; - __uint32_t xs_bmbt_2_compare; - __uint32_t xs_bmbt_2_insrec; - __uint32_t xs_bmbt_2_delrec; - __uint32_t xs_bmbt_2_newroot; - __uint32_t xs_bmbt_2_killroot; - __uint32_t xs_bmbt_2_increment; - __uint32_t xs_bmbt_2_decrement; - __uint32_t xs_bmbt_2_lshift; - __uint32_t xs_bmbt_2_rshift; - __uint32_t xs_bmbt_2_split; - __uint32_t xs_bmbt_2_join; - __uint32_t xs_bmbt_2_alloc; - __uint32_t xs_bmbt_2_free; - __uint32_t xs_bmbt_2_moves; -#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15) - __uint32_t xs_ibt_2_lookup; - __uint32_t xs_ibt_2_compare; - __uint32_t xs_ibt_2_insrec; - __uint32_t xs_ibt_2_delrec; - __uint32_t xs_ibt_2_newroot; - __uint32_t xs_ibt_2_killroot; - __uint32_t xs_ibt_2_increment; - __uint32_t xs_ibt_2_decrement; - __uint32_t xs_ibt_2_lshift; - __uint32_t xs_ibt_2_rshift; - __uint32_t xs_ibt_2_split; - __uint32_t xs_ibt_2_join; - __uint32_t xs_ibt_2_alloc; - __uint32_t xs_ibt_2_free; - __uint32_t xs_ibt_2_moves; -/* Extra precision counters */ - __uint64_t xs_xstrat_bytes; - __uint64_t xs_write_bytes; - __uint64_t xs_read_bytes; -}; - -DECLARE_PER_CPU(struct xfsstats, xfsstats); - -/* - * We don't disable preempt, not too worried about poking the - * wrong CPU's stat for now (also aggregated before reporting). - */ -#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++) -#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) -#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) - -extern int xfs_init_procfs(void); -extern void xfs_cleanup_procfs(void); - - -#else /* !CONFIG_PROC_FS */ - -# define XFS_STATS_INC(count) -# define XFS_STATS_DEC(count) -# define XFS_STATS_ADD(count, inc) - -static inline int xfs_init_procfs(void) -{ - return 0; -} - -static inline void xfs_cleanup_procfs(void) -{ -} - -#endif /* !CONFIG_PROC_FS */ - -#endif /* __XFS_STATS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c deleted file mode 100644 index 9a72dda58bd0..000000000000 --- a/fs/xfs/linux-2.6/xfs_super.c +++ /dev/null @@ -1,1773 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "xfs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_fsops.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_utils.h" -#include "xfs_vnodeops.h" -#include "xfs_log_priv.h" -#include "xfs_trans_priv.h" -#include "xfs_filestream.h" -#include "xfs_da_btree.h" -#include "xfs_extfree_item.h" -#include "xfs_mru_cache.h" -#include "xfs_inode_item.h" -#include "xfs_sync.h" -#include "xfs_trace.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static const struct super_operations xfs_super_operations; -static kmem_zone_t *xfs_ioend_zone; -mempool_t *xfs_ioend_pool; - -#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ -#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ -#define MNTOPT_LOGDEV "logdev" /* log device */ -#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ -#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ -#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ -#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ -#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ -#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ -#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ -#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ -#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ -#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ -#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ -#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ -#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ -#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ -#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ -#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and - * unwritten extent conversion */ -#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ -#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ -#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ -#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ -#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ -#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes - * in stat(). */ -#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ -#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ -#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */ -#define MNTOPT_QUOTA "quota" /* disk quotas (user) */ -#define MNTOPT_NOQUOTA "noquota" /* no quotas */ -#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */ -#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */ -#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */ -#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */ -#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */ -#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */ -#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */ -#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ -#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ -#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ -#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ -#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ -#define MNTOPT_DISCARD "discard" /* Discard unused blocks */ -#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ - -/* - * Table driven mount option parser. - * - * Currently only used for remount, but it will be used for mount - * in the future, too. - */ -enum { - Opt_barrier, Opt_nobarrier, Opt_err -}; - -static const match_table_t tokens = { - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_err, NULL} -}; - - -STATIC unsigned long -suffix_strtoul(char *s, char **endp, unsigned int base) -{ - int last, shift_left_factor = 0; - char *value = s; - - last = strlen(value) - 1; - if (value[last] == 'K' || value[last] == 'k') { - shift_left_factor = 10; - value[last] = '\0'; - } - if (value[last] == 'M' || value[last] == 'm') { - shift_left_factor = 20; - value[last] = '\0'; - } - if (value[last] == 'G' || value[last] == 'g') { - shift_left_factor = 30; - value[last] = '\0'; - } - - return simple_strtoul((const char *)s, endp, base) << shift_left_factor; -} - -/* - * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock has _not_ yet been read in. - * - * Note that this function leaks the various device name allocations on - * failure. The caller takes care of them. - */ -STATIC int -xfs_parseargs( - struct xfs_mount *mp, - char *options) -{ - struct super_block *sb = mp->m_super; - char *this_char, *value, *eov; - int dsunit = 0; - int dswidth = 0; - int iosize = 0; - __uint8_t iosizelog = 0; - - /* - * set up the mount name first so all the errors will refer to the - * correct device. - */ - mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); - if (!mp->m_fsname) - return ENOMEM; - mp->m_fsname_len = strlen(mp->m_fsname) + 1; - - /* - * Copy binary VFS mount flags we are interested in. - */ - if (sb->s_flags & MS_RDONLY) - mp->m_flags |= XFS_MOUNT_RDONLY; - if (sb->s_flags & MS_DIRSYNC) - mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (sb->s_flags & MS_SYNCHRONOUS) - mp->m_flags |= XFS_MOUNT_WSYNC; - - /* - * Set some default flags that could be cleared by the mount option - * parsing. - */ - mp->m_flags |= XFS_MOUNT_BARRIER; - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - mp->m_flags |= XFS_MOUNT_DELAYLOG; - - /* - * These can be overridden by the mount option parsing. - */ - mp->m_logbufs = -1; - mp->m_logbsize = -1; - - if (!options) - goto done; - - while ((this_char = strsep(&options, ",")) != NULL) { - if (!*this_char) - continue; - if ((value = strchr(this_char, '=')) != NULL) - *value++ = 0; - - if (!strcmp(this_char, MNTOPT_LOGBUFS)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - mp->m_logbufs = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - mp->m_logbsize = suffix_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); - if (!mp->m_logname) - return ENOMEM; - } else if (!strcmp(this_char, MNTOPT_MTPT)) { - xfs_warn(mp, "%s option not allowed on this system", - this_char); - return EINVAL; - } else if (!strcmp(this_char, MNTOPT_RTDEV)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); - if (!mp->m_rtname) - return ENOMEM; - } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - iosize = simple_strtoul(value, &eov, 10); - iosizelog = ffs(iosize) - 1; - } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - iosize = suffix_strtoul(value, &eov, 10); - iosizelog = ffs(iosize) - 1; - } else if (!strcmp(this_char, MNTOPT_GRPID) || - !strcmp(this_char, MNTOPT_BSDGROUPS)) { - mp->m_flags |= XFS_MOUNT_GRPID; - } else if (!strcmp(this_char, MNTOPT_NOGRPID) || - !strcmp(this_char, MNTOPT_SYSVGROUPS)) { - mp->m_flags &= ~XFS_MOUNT_GRPID; - } else if (!strcmp(this_char, MNTOPT_WSYNC)) { - mp->m_flags |= XFS_MOUNT_WSYNC; - } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { - mp->m_flags |= XFS_MOUNT_NORECOVERY; - } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { - mp->m_flags |= XFS_MOUNT_NOALIGN; - } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { - mp->m_flags |= XFS_MOUNT_SWALLOC; - } else if (!strcmp(this_char, MNTOPT_SUNIT)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - dsunit = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { - if (!value || !*value) { - xfs_warn(mp, "%s option requires an argument", - this_char); - return EINVAL; - } - dswidth = simple_strtoul(value, &eov, 10); - } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { - mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; -#if !XFS_BIG_INUMS - xfs_warn(mp, "%s option not allowed on this system", - this_char); - return EINVAL; -#endif - } else if (!strcmp(this_char, MNTOPT_NOUUID)) { - mp->m_flags |= XFS_MOUNT_NOUUID; - } else if (!strcmp(this_char, MNTOPT_BARRIER)) { - mp->m_flags |= XFS_MOUNT_BARRIER; - } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { - mp->m_flags &= ~XFS_MOUNT_BARRIER; - } else if (!strcmp(this_char, MNTOPT_IKEEP)) { - mp->m_flags |= XFS_MOUNT_IKEEP; - } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { - mp->m_flags &= ~XFS_MOUNT_IKEEP; - } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { - mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; - } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - } else if (!strcmp(this_char, MNTOPT_ATTR2)) { - mp->m_flags |= XFS_MOUNT_ATTR2; - } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { - mp->m_flags &= ~XFS_MOUNT_ATTR2; - mp->m_flags |= XFS_MOUNT_NOATTR2; - } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { - mp->m_flags |= XFS_MOUNT_FILESTREAMS; - } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { - mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | - XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | - XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | - XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); - } else if (!strcmp(this_char, MNTOPT_QUOTA) || - !strcmp(this_char, MNTOPT_UQUOTA) || - !strcmp(this_char, MNTOPT_USRQUOTA)) { - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | - XFS_UQUOTA_ENFD); - } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || - !strcmp(this_char, MNTOPT_UQUOTANOENF)) { - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_UQUOTA_ENFD; - } else if (!strcmp(this_char, MNTOPT_PQUOTA) || - !strcmp(this_char, MNTOPT_PRJQUOTA)) { - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | - XFS_OQUOTA_ENFD); - } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_OQUOTA_ENFD; - } else if (!strcmp(this_char, MNTOPT_GQUOTA) || - !strcmp(this_char, MNTOPT_GRPQUOTA)) { - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | - XFS_OQUOTA_ENFD); - } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); - mp->m_qflags &= ~XFS_OQUOTA_ENFD; - } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { - mp->m_flags |= XFS_MOUNT_DELAYLOG; - } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { - mp->m_flags &= ~XFS_MOUNT_DELAYLOG; - } else if (!strcmp(this_char, MNTOPT_DISCARD)) { - mp->m_flags |= XFS_MOUNT_DISCARD; - } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { - mp->m_flags &= ~XFS_MOUNT_DISCARD; - } else if (!strcmp(this_char, "ihashsize")) { - xfs_warn(mp, - "ihashsize no longer used, option is deprecated."); - } else if (!strcmp(this_char, "osyncisdsync")) { - xfs_warn(mp, - "osyncisdsync has no effect, option is deprecated."); - } else if (!strcmp(this_char, "osyncisosync")) { - xfs_warn(mp, - "osyncisosync has no effect, option is deprecated."); - } else if (!strcmp(this_char, "irixsgid")) { - xfs_warn(mp, - "irixsgid is now a sysctl(2) variable, option is deprecated."); - } else { - xfs_warn(mp, "unknown mount option [%s].", this_char); - return EINVAL; - } - } - - /* - * no recovery flag requires a read-only mount - */ - if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && - !(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_warn(mp, "no-recovery mounts must be read-only."); - return EINVAL; - } - - if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { - xfs_warn(mp, - "sunit and swidth options incompatible with the noalign option"); - return EINVAL; - } - - if ((mp->m_flags & XFS_MOUNT_DISCARD) && - !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { - xfs_warn(mp, - "the discard option is incompatible with the nodelaylog option"); - return EINVAL; - } - -#ifndef CONFIG_XFS_QUOTA - if (XFS_IS_QUOTA_RUNNING(mp)) { - xfs_warn(mp, "quota support not available in this kernel."); - return EINVAL; - } -#endif - - if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && - (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { - xfs_warn(mp, "cannot mount with both project and group quota"); - return EINVAL; - } - - if ((dsunit && !dswidth) || (!dsunit && dswidth)) { - xfs_warn(mp, "sunit and swidth must be specified together"); - return EINVAL; - } - - if (dsunit && (dswidth % dsunit != 0)) { - xfs_warn(mp, - "stripe width (%d) must be a multiple of the stripe unit (%d)", - dswidth, dsunit); - return EINVAL; - } - -done: - if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { - /* - * At this point the superblock has not been read - * in, therefore we do not know the block size. - * Before the mount call ends we will convert - * these to FSBs. - */ - if (dsunit) { - mp->m_dalign = dsunit; - mp->m_flags |= XFS_MOUNT_RETERR; - } - - if (dswidth) - mp->m_swidth = dswidth; - } - - if (mp->m_logbufs != -1 && - mp->m_logbufs != 0 && - (mp->m_logbufs < XLOG_MIN_ICLOGS || - mp->m_logbufs > XLOG_MAX_ICLOGS)) { - xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", - mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); - return XFS_ERROR(EINVAL); - } - if (mp->m_logbsize != -1 && - mp->m_logbsize != 0 && - (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || - mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || - !is_power_of_2(mp->m_logbsize))) { - xfs_warn(mp, - "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", - mp->m_logbsize); - return XFS_ERROR(EINVAL); - } - - if (iosizelog) { - if (iosizelog > XFS_MAX_IO_LOG || - iosizelog < XFS_MIN_IO_LOG) { - xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", - iosizelog, XFS_MIN_IO_LOG, - XFS_MAX_IO_LOG); - return XFS_ERROR(EINVAL); - } - - mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; - mp->m_readio_log = iosizelog; - mp->m_writeio_log = iosizelog; - } - - return 0; -} - -struct proc_xfs_info { - int flag; - char *str; -}; - -STATIC int -xfs_showargs( - struct xfs_mount *mp, - struct seq_file *m) -{ - static struct proc_xfs_info xfs_info_set[] = { - /* the few simple ones we can get from the mount struct */ - { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP }, - { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, - { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, - { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, - { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, - { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, - { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, - { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, - { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, - { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, - { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, - { 0, NULL } - }; - static struct proc_xfs_info xfs_info_unset[] = { - /* the few simple ones we can get from the mount struct */ - { XFS_MOUNT_COMPAT_IOSIZE, "," MNTOPT_LARGEIO }, - { XFS_MOUNT_BARRIER, "," MNTOPT_NOBARRIER }, - { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_64BITINODE }, - { 0, NULL } - }; - struct proc_xfs_info *xfs_infop; - - for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { - if (mp->m_flags & xfs_infop->flag) - seq_puts(m, xfs_infop->str); - } - for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) { - if (!(mp->m_flags & xfs_infop->flag)) - seq_puts(m, xfs_infop->str); - } - - if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) - seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", - (int)(1 << mp->m_writeio_log) >> 10); - - if (mp->m_logbufs > 0) - seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); - if (mp->m_logbsize > 0) - seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); - - if (mp->m_logname) - seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); - if (mp->m_rtname) - seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); - - if (mp->m_dalign > 0) - seq_printf(m, "," MNTOPT_SUNIT "=%d", - (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); - if (mp->m_swidth > 0) - seq_printf(m, "," MNTOPT_SWIDTH "=%d", - (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - - if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) - seq_puts(m, "," MNTOPT_USRQUOTA); - else if (mp->m_qflags & XFS_UQUOTA_ACCT) - seq_puts(m, "," MNTOPT_UQUOTANOENF); - - /* Either project or group quotas can be active, not both */ - - if (mp->m_qflags & XFS_PQUOTA_ACCT) { - if (mp->m_qflags & XFS_OQUOTA_ENFD) - seq_puts(m, "," MNTOPT_PRJQUOTA); - else - seq_puts(m, "," MNTOPT_PQUOTANOENF); - } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { - if (mp->m_qflags & XFS_OQUOTA_ENFD) - seq_puts(m, "," MNTOPT_GRPQUOTA); - else - seq_puts(m, "," MNTOPT_GQUOTANOENF); - } - - if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) - seq_puts(m, "," MNTOPT_NOQUOTA); - - return 0; -} -__uint64_t -xfs_max_file_offset( - unsigned int blockshift) -{ - unsigned int pagefactor = 1; - unsigned int bitshift = BITS_PER_LONG - 1; - - /* Figure out maximum filesize, on Linux this can depend on - * the filesystem blocksize (on 32 bit platforms). - * __block_write_begin does this in an [unsigned] long... - * page->index << (PAGE_CACHE_SHIFT - bbits) - * So, for page sized blocks (4K on 32 bit platforms), - * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is - * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) - * but for smaller blocksizes it is less (bbits = log2 bsize). - * Note1: get_block_t takes a long (implicit cast from above) - * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch - * can optionally convert the [unsigned] long from above into - * an [unsigned] long long. - */ - -#if BITS_PER_LONG == 32 -# if defined(CONFIG_LBDAF) - ASSERT(sizeof(sector_t) == 8); - pagefactor = PAGE_CACHE_SIZE; - bitshift = BITS_PER_LONG; -# else - pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift); -# endif -#endif - - return (((__uint64_t)pagefactor) << bitshift) - 1; -} - -STATIC int -xfs_blkdev_get( - xfs_mount_t *mp, - const char *name, - struct block_device **bdevp) -{ - int error = 0; - - *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, - mp); - if (IS_ERR(*bdevp)) { - error = PTR_ERR(*bdevp); - xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); - } - - return -error; -} - -STATIC void -xfs_blkdev_put( - struct block_device *bdev) -{ - if (bdev) - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); -} - -void -xfs_blkdev_issue_flush( - xfs_buftarg_t *buftarg) -{ - blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); -} - -STATIC void -xfs_close_devices( - struct xfs_mount *mp) -{ - if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { - struct block_device *logdev = mp->m_logdev_targp->bt_bdev; - xfs_free_buftarg(mp, mp->m_logdev_targp); - xfs_blkdev_put(logdev); - } - if (mp->m_rtdev_targp) { - struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; - xfs_free_buftarg(mp, mp->m_rtdev_targp); - xfs_blkdev_put(rtdev); - } - xfs_free_buftarg(mp, mp->m_ddev_targp); -} - -/* - * The file system configurations are: - * (1) device (partition) with data and internal log - * (2) logical volume with data and log subvolumes. - * (3) logical volume with data, log, and realtime subvolumes. - * - * We only have to handle opening the log and realtime volumes here if - * they are present. The data subvolume has already been opened by - * get_sb_bdev() and is stored in sb->s_bdev. - */ -STATIC int -xfs_open_devices( - struct xfs_mount *mp) -{ - struct block_device *ddev = mp->m_super->s_bdev; - struct block_device *logdev = NULL, *rtdev = NULL; - int error; - - /* - * Open real time and log devices - order is important. - */ - if (mp->m_logname) { - error = xfs_blkdev_get(mp, mp->m_logname, &logdev); - if (error) - goto out; - } - - if (mp->m_rtname) { - error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); - if (error) - goto out_close_logdev; - - if (rtdev == ddev || rtdev == logdev) { - xfs_warn(mp, - "Cannot mount filesystem with identical rtdev and ddev/logdev."); - error = EINVAL; - goto out_close_rtdev; - } - } - - /* - * Setup xfs_mount buffer target pointers - */ - error = ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); - if (!mp->m_ddev_targp) - goto out_close_rtdev; - - if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, - mp->m_fsname); - if (!mp->m_rtdev_targp) - goto out_free_ddev_targ; - } - - if (logdev && logdev != ddev) { - mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, - mp->m_fsname); - if (!mp->m_logdev_targp) - goto out_free_rtdev_targ; - } else { - mp->m_logdev_targp = mp->m_ddev_targp; - } - - return 0; - - out_free_rtdev_targ: - if (mp->m_rtdev_targp) - xfs_free_buftarg(mp, mp->m_rtdev_targp); - out_free_ddev_targ: - xfs_free_buftarg(mp, mp->m_ddev_targp); - out_close_rtdev: - if (rtdev) - xfs_blkdev_put(rtdev); - out_close_logdev: - if (logdev && logdev != ddev) - xfs_blkdev_put(logdev); - out: - return error; -} - -/* - * Setup xfs_mount buffer target pointers based on superblock - */ -STATIC int -xfs_setup_devices( - struct xfs_mount *mp) -{ - int error; - - error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, - mp->m_sb.sb_sectsize); - if (error) - return error; - - if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { - unsigned int log_sector_size = BBSIZE; - - if (xfs_sb_version_hassector(&mp->m_sb)) - log_sector_size = mp->m_sb.sb_logsectsize; - error = xfs_setsize_buftarg(mp->m_logdev_targp, - mp->m_sb.sb_blocksize, - log_sector_size); - if (error) - return error; - } - if (mp->m_rtdev_targp) { - error = xfs_setsize_buftarg(mp->m_rtdev_targp, - mp->m_sb.sb_blocksize, - mp->m_sb.sb_sectsize); - if (error) - return error; - } - - return 0; -} - -/* Catch misguided souls that try to use this interface on XFS */ -STATIC struct inode * -xfs_fs_alloc_inode( - struct super_block *sb) -{ - BUG(); - return NULL; -} - -/* - * Now that the generic code is guaranteed not to be accessing - * the linux inode, we can reclaim the inode. - */ -STATIC void -xfs_fs_destroy_inode( - struct inode *inode) -{ - struct xfs_inode *ip = XFS_I(inode); - - trace_xfs_destroy_inode(ip); - - XFS_STATS_INC(vn_reclaim); - - /* bad inode, get out here ASAP */ - if (is_bad_inode(inode)) - goto out_reclaim; - - xfs_ioend_wait(ip); - - ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); - - /* - * We should never get here with one of the reclaim flags already set. - */ - ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); - - /* - * We always use background reclaim here because even if the - * inode is clean, it still may be under IO and hence we have - * to take the flush lock. The background reclaim path handles - * this more efficiently than we can here, so simply let background - * reclaim tear down all inodes. - */ -out_reclaim: - xfs_inode_set_reclaim_tag(ip); -} - -/* - * Slab object creation initialisation for the XFS inode. - * This covers only the idempotent fields in the XFS inode; - * all other fields need to be initialised on allocation - * from the slab. This avoids the need to repeatedly initialise - * fields in the xfs inode that left in the initialise state - * when freeing the inode. - */ -STATIC void -xfs_fs_inode_init_once( - void *inode) -{ - struct xfs_inode *ip = inode; - - memset(ip, 0, sizeof(struct xfs_inode)); - - /* vfs inode */ - inode_init_once(VFS_I(ip)); - - /* xfs inode */ - atomic_set(&ip->i_iocount, 0); - atomic_set(&ip->i_pincount, 0); - spin_lock_init(&ip->i_flags_lock); - init_waitqueue_head(&ip->i_ipin_wait); - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&ip->i_flush); - complete(&ip->i_flush); - - mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, - "xfsino", ip->i_ino); -} - -/* - * Dirty the XFS inode when mark_inode_dirty_sync() is called so that - * we catch unlogged VFS level updates to the inode. - * - * We need the barrier() to maintain correct ordering between unlogged - * updates and the transaction commit code that clears the i_update_core - * field. This requires all updates to be completed before marking the - * inode dirty. - */ -STATIC void -xfs_fs_dirty_inode( - struct inode *inode, - int flags) -{ - barrier(); - XFS_I(inode)->i_update_core = 1; -} - -STATIC int -xfs_log_inode( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_trans *tp; - int error; - - xfs_iunlock(ip, XFS_ILOCK_SHARED); - tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); - error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); - - if (error) { - xfs_trans_cancel(tp, 0); - /* we need to return with the lock hold shared */ - xfs_ilock(ip, XFS_ILOCK_SHARED); - return error; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - - /* - * Note - it's possible that we might have pushed ourselves out of the - * way during trans_reserve which would flush the inode. But there's - * no guarantee that the inode buffer has actually gone out yet (it's - * delwri). Plus the buffer could be pinned anyway if it's part of - * an inode in another recent transaction. So we play it safe and - * fire off the transaction anyway. - */ - xfs_trans_ijoin(tp, ip); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = xfs_trans_commit(tp, 0); - xfs_ilock_demote(ip, XFS_ILOCK_EXCL); - - return error; -} - -STATIC int -xfs_fs_write_inode( - struct inode *inode, - struct writeback_control *wbc) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - int error = EAGAIN; - - trace_xfs_write_inode(ip); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - if (wbc->sync_mode == WB_SYNC_ALL) { - /* - * Make sure the inode has made it it into the log. Instead - * of forcing it all the way to stable storage using a - * synchronous transaction we let the log force inside the - * ->sync_fs call do that for thus, which reduces the number - * of synchronous log foces dramatically. - */ - xfs_ioend_wait(ip); - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (ip->i_update_core) { - error = xfs_log_inode(ip); - if (error) - goto out_unlock; - } - } else { - /* - * We make this non-blocking if the inode is contended, return - * EAGAIN to indicate to the caller that they did not succeed. - * This prevents the flush path from blocking on inodes inside - * another operation right now, they get caught later by - * xfs_sync. - */ - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) - goto out; - - if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) - goto out_unlock; - - /* - * Now we have the flush lock and the inode is not pinned, we - * can check if the inode is really clean as we know that - * there are no pending transaction completions, it is not - * waiting on the delayed write queue and there is no IO in - * progress. - */ - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - error = 0; - goto out_unlock; - } - error = xfs_iflush(ip, SYNC_TRYLOCK); - } - - out_unlock: - xfs_iunlock(ip, XFS_ILOCK_SHARED); - out: - /* - * if we failed to write out the inode then mark - * it dirty again so we'll try again later. - */ - if (error) - xfs_mark_inode_dirty_sync(ip); - return -error; -} - -STATIC void -xfs_fs_evict_inode( - struct inode *inode) -{ - xfs_inode_t *ip = XFS_I(inode); - - trace_xfs_evict_inode(ip); - - truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); - XFS_STATS_INC(vn_rele); - XFS_STATS_INC(vn_remove); - XFS_STATS_DEC(vn_active); - - /* - * The iolock is used by the file system to coordinate reads, - * writes, and block truncates. Up to this point the lock - * protected concurrent accesses by users of the inode. But - * from here forward we're doing some final processing of the - * inode because we're done with it, and although we reuse the - * iolock for protection it is really a distinct lock class - * (in the lockdep sense) from before. To keep lockdep happy - * (and basically indicate what we are doing), we explicitly - * re-init the iolock here. - */ - ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); - mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); - lockdep_set_class_and_name(&ip->i_iolock.mr_lock, - &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); - - xfs_inactive(ip); -} - -STATIC void -xfs_free_fsname( - struct xfs_mount *mp) -{ - kfree(mp->m_fsname); - kfree(mp->m_rtname); - kfree(mp->m_logname); -} - -STATIC void -xfs_fs_put_super( - struct super_block *sb) -{ - struct xfs_mount *mp = XFS_M(sb); - - xfs_syncd_stop(mp); - - /* - * Blow away any referenced inode in the filestreams cache. - * This can and will cause log traffic as inodes go inactive - * here. - */ - xfs_filestream_unmount(mp); - - XFS_bflush(mp->m_ddev_targp); - - xfs_unmountfs(mp); - xfs_freesb(mp); - xfs_icsb_destroy_counters(mp); - xfs_close_devices(mp); - xfs_free_fsname(mp); - kfree(mp); -} - -STATIC int -xfs_fs_sync_fs( - struct super_block *sb, - int wait) -{ - struct xfs_mount *mp = XFS_M(sb); - int error; - - /* - * Not much we can do for the first async pass. Writing out the - * superblock would be counter-productive as we are going to redirty - * when writing out other data and metadata (and writing out a single - * block is quite fast anyway). - * - * Try to asynchronously kick off quota syncing at least. - */ - if (!wait) { - xfs_qm_sync(mp, SYNC_TRYLOCK); - return 0; - } - - error = xfs_quiesce_data(mp); - if (error) - return -error; - - if (laptop_mode) { - /* - * The disk must be active because we're syncing. - * We schedule xfssyncd now (now that the disk is - * active) instead of later (when it might not be). - */ - flush_delayed_work_sync(&mp->m_sync_work); - } - - return 0; -} - -STATIC int -xfs_fs_statfs( - struct dentry *dentry, - struct kstatfs *statp) -{ - struct xfs_mount *mp = XFS_M(dentry->d_sb); - xfs_sb_t *sbp = &mp->m_sb; - struct xfs_inode *ip = XFS_I(dentry->d_inode); - __uint64_t fakeinos, id; - xfs_extlen_t lsize; - __int64_t ffree; - - statp->f_type = XFS_SB_MAGIC; - statp->f_namelen = MAXNAMELEN - 1; - - id = huge_encode_dev(mp->m_ddev_targp->bt_dev); - statp->f_fsid.val[0] = (u32)id; - statp->f_fsid.val[1] = (u32)(id >> 32); - - xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); - - spin_lock(&mp->m_sb_lock); - statp->f_bsize = sbp->sb_blocksize; - lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; - statp->f_blocks = sbp->sb_dblocks - lsize; - statp->f_bfree = statp->f_bavail = - sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); - fakeinos = statp->f_bfree << sbp->sb_inopblog; - statp->f_files = - MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); - if (mp->m_maxicount) - statp->f_files = min_t(typeof(statp->f_files), - statp->f_files, - mp->m_maxicount); - - /* make sure statp->f_ffree does not underflow */ - ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); - statp->f_ffree = max_t(__int64_t, ffree, 0); - - spin_unlock(&mp->m_sb_lock); - - if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || - ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == - (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) - xfs_qm_statvfs(ip, statp); - return 0; -} - -STATIC void -xfs_save_resvblks(struct xfs_mount *mp) -{ - __uint64_t resblks = 0; - - mp->m_resblks_save = mp->m_resblks; - xfs_reserve_blocks(mp, &resblks, NULL); -} - -STATIC void -xfs_restore_resvblks(struct xfs_mount *mp) -{ - __uint64_t resblks; - - if (mp->m_resblks_save) { - resblks = mp->m_resblks_save; - mp->m_resblks_save = 0; - } else - resblks = xfs_default_resblks(mp); - - xfs_reserve_blocks(mp, &resblks, NULL); -} - -STATIC int -xfs_fs_remount( - struct super_block *sb, - int *flags, - char *options) -{ - struct xfs_mount *mp = XFS_M(sb); - substring_t args[MAX_OPT_ARGS]; - char *p; - int error; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_barrier: - mp->m_flags |= XFS_MOUNT_BARRIER; - break; - case Opt_nobarrier: - mp->m_flags &= ~XFS_MOUNT_BARRIER; - break; - default: - /* - * Logically we would return an error here to prevent - * users from believing they might have changed - * mount options using remount which can't be changed. - * - * But unfortunately mount(8) adds all options from - * mtab and fstab to the mount arguments in some cases - * so we can't blindly reject options, but have to - * check for each specified option if it actually - * differs from the currently set option and only - * reject it if that's the case. - * - * Until that is implemented we return success for - * every remount request, and silently ignore all - * options that we can't actually change. - */ -#if 0 - xfs_info(mp, - "mount option \"%s\" not supported for remount\n", p); - return -EINVAL; -#else - break; -#endif - } - } - - /* ro -> rw */ - if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { - mp->m_flags &= ~XFS_MOUNT_RDONLY; - - /* - * If this is the first remount to writeable state we - * might have some superblock changes to update. - */ - if (mp->m_update_flags) { - error = xfs_mount_log_sb(mp, mp->m_update_flags); - if (error) { - xfs_warn(mp, "failed to write sb changes"); - return error; - } - mp->m_update_flags = 0; - } - - /* - * Fill out the reserve pool if it is empty. Use the stashed - * value if it is non-zero, otherwise go with the default. - */ - xfs_restore_resvblks(mp); - } - - /* rw -> ro */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { - /* - * After we have synced the data but before we sync the - * metadata, we need to free up the reserve block pool so that - * the used block count in the superblock on disk is correct at - * the end of the remount. Stash the current reserve pool size - * so that if we get remounted rw, we can return it to the same - * size. - */ - - xfs_quiesce_data(mp); - xfs_save_resvblks(mp); - xfs_quiesce_attr(mp); - mp->m_flags |= XFS_MOUNT_RDONLY; - } - - return 0; -} - -/* - * Second stage of a freeze. The data is already frozen so we only - * need to take care of the metadata. Once that's done write a dummy - * record to dirty the log in case of a crash while frozen. - */ -STATIC int -xfs_fs_freeze( - struct super_block *sb) -{ - struct xfs_mount *mp = XFS_M(sb); - - xfs_save_resvblks(mp); - xfs_quiesce_attr(mp); - return -xfs_fs_log_dummy(mp); -} - -STATIC int -xfs_fs_unfreeze( - struct super_block *sb) -{ - struct xfs_mount *mp = XFS_M(sb); - - xfs_restore_resvblks(mp); - return 0; -} - -STATIC int -xfs_fs_show_options( - struct seq_file *m, - struct vfsmount *mnt) -{ - return -xfs_showargs(XFS_M(mnt->mnt_sb), m); -} - -/* - * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock _has_ now been read in. - */ -STATIC int -xfs_finish_flags( - struct xfs_mount *mp) -{ - int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); - - /* Fail a mount where the logbuf is smaller than the log stripe */ - if (xfs_sb_version_haslogv2(&mp->m_sb)) { - if (mp->m_logbsize <= 0 && - mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { - mp->m_logbsize = mp->m_sb.sb_logsunit; - } else if (mp->m_logbsize > 0 && - mp->m_logbsize < mp->m_sb.sb_logsunit) { - xfs_warn(mp, - "logbuf size must be greater than or equal to log stripe size"); - return XFS_ERROR(EINVAL); - } - } else { - /* Fail a mount if the logbuf is larger than 32K */ - if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { - xfs_warn(mp, - "logbuf size for version 1 logs must be 16K or 32K"); - return XFS_ERROR(EINVAL); - } - } - - /* - * mkfs'ed attr2 will turn on attr2 mount unless explicitly - * told by noattr2 to turn it off - */ - if (xfs_sb_version_hasattr2(&mp->m_sb) && - !(mp->m_flags & XFS_MOUNT_NOATTR2)) - mp->m_flags |= XFS_MOUNT_ATTR2; - - /* - * prohibit r/w mounts of read-only filesystems - */ - if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { - xfs_warn(mp, - "cannot mount a read-only filesystem as read-write"); - return XFS_ERROR(EROFS); - } - - return 0; -} - -STATIC int -xfs_fs_fill_super( - struct super_block *sb, - void *data, - int silent) -{ - struct inode *root; - struct xfs_mount *mp = NULL; - int flags = 0, error = ENOMEM; - - mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); - if (!mp) - goto out; - - spin_lock_init(&mp->m_sb_lock); - mutex_init(&mp->m_growlock); - atomic_set(&mp->m_active_trans, 0); - - mp->m_super = sb; - sb->s_fs_info = mp; - - error = xfs_parseargs(mp, (char *)data); - if (error) - goto out_free_fsname; - - sb_min_blocksize(sb, BBSIZE); - sb->s_xattr = xfs_xattr_handlers; - sb->s_export_op = &xfs_export_operations; -#ifdef CONFIG_XFS_QUOTA - sb->s_qcop = &xfs_quotactl_operations; -#endif - sb->s_op = &xfs_super_operations; - - if (silent) - flags |= XFS_MFSI_QUIET; - - error = xfs_open_devices(mp); - if (error) - goto out_free_fsname; - - error = xfs_icsb_init_counters(mp); - if (error) - goto out_close_devices; - - error = xfs_readsb(mp, flags); - if (error) - goto out_destroy_counters; - - error = xfs_finish_flags(mp); - if (error) - goto out_free_sb; - - error = xfs_setup_devices(mp); - if (error) - goto out_free_sb; - - error = xfs_filestream_mount(mp); - if (error) - goto out_free_sb; - - /* - * we must configure the block size in the superblock before we run the - * full mount process as the mount process can lookup and cache inodes. - * For the same reason we must also initialise the syncd and register - * the inode cache shrinker so that inodes can be reclaimed during - * operations like a quotacheck that iterate all inodes in the - * filesystem. - */ - sb->s_magic = XFS_SB_MAGIC; - sb->s_blocksize = mp->m_sb.sb_blocksize; - sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; - sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); - sb->s_time_gran = 1; - set_posix_acl_flag(sb); - - error = xfs_mountfs(mp); - if (error) - goto out_filestream_unmount; - - error = xfs_syncd_init(mp); - if (error) - goto out_unmount; - - root = igrab(VFS_I(mp->m_rootip)); - if (!root) { - error = ENOENT; - goto out_syncd_stop; - } - if (is_bad_inode(root)) { - error = EINVAL; - goto out_syncd_stop; - } - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - error = ENOMEM; - goto out_iput; - } - - return 0; - - out_filestream_unmount: - xfs_filestream_unmount(mp); - out_free_sb: - xfs_freesb(mp); - out_destroy_counters: - xfs_icsb_destroy_counters(mp); - out_close_devices: - xfs_close_devices(mp); - out_free_fsname: - xfs_free_fsname(mp); - kfree(mp); - out: - return -error; - - out_iput: - iput(root); - out_syncd_stop: - xfs_syncd_stop(mp); - out_unmount: - /* - * Blow away any referenced inode in the filestreams cache. - * This can and will cause log traffic as inodes go inactive - * here. - */ - xfs_filestream_unmount(mp); - - XFS_bflush(mp->m_ddev_targp); - - xfs_unmountfs(mp); - goto out_free_sb; -} - -STATIC struct dentry * -xfs_fs_mount( - struct file_system_type *fs_type, - int flags, - const char *dev_name, - void *data) -{ - return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); -} - -static int -xfs_fs_nr_cached_objects( - struct super_block *sb) -{ - return xfs_reclaim_inodes_count(XFS_M(sb)); -} - -static void -xfs_fs_free_cached_objects( - struct super_block *sb, - int nr_to_scan) -{ - xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); -} - -static const struct super_operations xfs_super_operations = { - .alloc_inode = xfs_fs_alloc_inode, - .destroy_inode = xfs_fs_destroy_inode, - .dirty_inode = xfs_fs_dirty_inode, - .write_inode = xfs_fs_write_inode, - .evict_inode = xfs_fs_evict_inode, - .put_super = xfs_fs_put_super, - .sync_fs = xfs_fs_sync_fs, - .freeze_fs = xfs_fs_freeze, - .unfreeze_fs = xfs_fs_unfreeze, - .statfs = xfs_fs_statfs, - .remount_fs = xfs_fs_remount, - .show_options = xfs_fs_show_options, - .nr_cached_objects = xfs_fs_nr_cached_objects, - .free_cached_objects = xfs_fs_free_cached_objects, -}; - -static struct file_system_type xfs_fs_type = { - .owner = THIS_MODULE, - .name = "xfs", - .mount = xfs_fs_mount, - .kill_sb = kill_block_super, - .fs_flags = FS_REQUIRES_DEV, -}; - -STATIC int __init -xfs_init_zones(void) -{ - - xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); - if (!xfs_ioend_zone) - goto out; - - xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, - xfs_ioend_zone); - if (!xfs_ioend_pool) - goto out_destroy_ioend_zone; - - xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), - "xfs_log_ticket"); - if (!xfs_log_ticket_zone) - goto out_destroy_ioend_pool; - - xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), - "xfs_bmap_free_item"); - if (!xfs_bmap_free_item_zone) - goto out_destroy_log_ticket_zone; - - xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), - "xfs_btree_cur"); - if (!xfs_btree_cur_zone) - goto out_destroy_bmap_free_item_zone; - - xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), - "xfs_da_state"); - if (!xfs_da_state_zone) - goto out_destroy_btree_cur_zone; - - xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); - if (!xfs_dabuf_zone) - goto out_destroy_da_state_zone; - - xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); - if (!xfs_ifork_zone) - goto out_destroy_dabuf_zone; - - xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); - if (!xfs_trans_zone) - goto out_destroy_ifork_zone; - - xfs_log_item_desc_zone = - kmem_zone_init(sizeof(struct xfs_log_item_desc), - "xfs_log_item_desc"); - if (!xfs_log_item_desc_zone) - goto out_destroy_trans_zone; - - /* - * The size of the zone allocated buf log item is the maximum - * size possible under XFS. This wastes a little bit of memory, - * but it is much faster. - */ - xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + - (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / - NBWORD) * sizeof(int))), "xfs_buf_item"); - if (!xfs_buf_item_zone) - goto out_destroy_log_item_desc_zone; - - xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + - ((XFS_EFD_MAX_FAST_EXTENTS - 1) * - sizeof(xfs_extent_t))), "xfs_efd_item"); - if (!xfs_efd_zone) - goto out_destroy_buf_item_zone; - - xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) + - ((XFS_EFI_MAX_FAST_EXTENTS - 1) * - sizeof(xfs_extent_t))), "xfs_efi_item"); - if (!xfs_efi_zone) - goto out_destroy_efd_zone; - - xfs_inode_zone = - kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, - xfs_fs_inode_init_once); - if (!xfs_inode_zone) - goto out_destroy_efi_zone; - - xfs_ili_zone = - kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", - KM_ZONE_SPREAD, NULL); - if (!xfs_ili_zone) - goto out_destroy_inode_zone; - - return 0; - - out_destroy_inode_zone: - kmem_zone_destroy(xfs_inode_zone); - out_destroy_efi_zone: - kmem_zone_destroy(xfs_efi_zone); - out_destroy_efd_zone: - kmem_zone_destroy(xfs_efd_zone); - out_destroy_buf_item_zone: - kmem_zone_destroy(xfs_buf_item_zone); - out_destroy_log_item_desc_zone: - kmem_zone_destroy(xfs_log_item_desc_zone); - out_destroy_trans_zone: - kmem_zone_destroy(xfs_trans_zone); - out_destroy_ifork_zone: - kmem_zone_destroy(xfs_ifork_zone); - out_destroy_dabuf_zone: - kmem_zone_destroy(xfs_dabuf_zone); - out_destroy_da_state_zone: - kmem_zone_destroy(xfs_da_state_zone); - out_destroy_btree_cur_zone: - kmem_zone_destroy(xfs_btree_cur_zone); - out_destroy_bmap_free_item_zone: - kmem_zone_destroy(xfs_bmap_free_item_zone); - out_destroy_log_ticket_zone: - kmem_zone_destroy(xfs_log_ticket_zone); - out_destroy_ioend_pool: - mempool_destroy(xfs_ioend_pool); - out_destroy_ioend_zone: - kmem_zone_destroy(xfs_ioend_zone); - out: - return -ENOMEM; -} - -STATIC void -xfs_destroy_zones(void) -{ - kmem_zone_destroy(xfs_ili_zone); - kmem_zone_destroy(xfs_inode_zone); - kmem_zone_destroy(xfs_efi_zone); - kmem_zone_destroy(xfs_efd_zone); - kmem_zone_destroy(xfs_buf_item_zone); - kmem_zone_destroy(xfs_log_item_desc_zone); - kmem_zone_destroy(xfs_trans_zone); - kmem_zone_destroy(xfs_ifork_zone); - kmem_zone_destroy(xfs_dabuf_zone); - kmem_zone_destroy(xfs_da_state_zone); - kmem_zone_destroy(xfs_btree_cur_zone); - kmem_zone_destroy(xfs_bmap_free_item_zone); - kmem_zone_destroy(xfs_log_ticket_zone); - mempool_destroy(xfs_ioend_pool); - kmem_zone_destroy(xfs_ioend_zone); - -} - -STATIC int __init -xfs_init_workqueues(void) -{ - /* - * max_active is set to 8 to give enough concurency to allow - * multiple work operations on each CPU to run. This allows multiple - * filesystems to be running sync work concurrently, and scales with - * the number of CPUs in the system. - */ - xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); - if (!xfs_syncd_wq) - goto out; - - xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); - if (!xfs_ail_wq) - goto out_destroy_syncd; - - return 0; - -out_destroy_syncd: - destroy_workqueue(xfs_syncd_wq); -out: - return -ENOMEM; -} - -STATIC void -xfs_destroy_workqueues(void) -{ - destroy_workqueue(xfs_ail_wq); - destroy_workqueue(xfs_syncd_wq); -} - -STATIC int __init -init_xfs_fs(void) -{ - int error; - - printk(KERN_INFO XFS_VERSION_STRING " with " - XFS_BUILD_OPTIONS " enabled\n"); - - xfs_ioend_init(); - xfs_dir_startup(); - - error = xfs_init_zones(); - if (error) - goto out; - - error = xfs_init_workqueues(); - if (error) - goto out_destroy_zones; - - error = xfs_mru_cache_init(); - if (error) - goto out_destroy_wq; - - error = xfs_filestream_init(); - if (error) - goto out_mru_cache_uninit; - - error = xfs_buf_init(); - if (error) - goto out_filestream_uninit; - - error = xfs_init_procfs(); - if (error) - goto out_buf_terminate; - - error = xfs_sysctl_register(); - if (error) - goto out_cleanup_procfs; - - vfs_initquota(); - - error = register_filesystem(&xfs_fs_type); - if (error) - goto out_sysctl_unregister; - return 0; - - out_sysctl_unregister: - xfs_sysctl_unregister(); - out_cleanup_procfs: - xfs_cleanup_procfs(); - out_buf_terminate: - xfs_buf_terminate(); - out_filestream_uninit: - xfs_filestream_uninit(); - out_mru_cache_uninit: - xfs_mru_cache_uninit(); - out_destroy_wq: - xfs_destroy_workqueues(); - out_destroy_zones: - xfs_destroy_zones(); - out: - return error; -} - -STATIC void __exit -exit_xfs_fs(void) -{ - vfs_exitquota(); - unregister_filesystem(&xfs_fs_type); - xfs_sysctl_unregister(); - xfs_cleanup_procfs(); - xfs_buf_terminate(); - xfs_filestream_uninit(); - xfs_mru_cache_uninit(); - xfs_destroy_workqueues(); - xfs_destroy_zones(); -} - -module_init(init_xfs_fs); -module_exit(exit_xfs_fs); - -MODULE_AUTHOR("Silicon Graphics, Inc."); -MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); -MODULE_LICENSE("GPL"); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h deleted file mode 100644 index 50a3266c999e..000000000000 --- a/fs/xfs/linux-2.6/xfs_super.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPER_H__ -#define __XFS_SUPER_H__ - -#include - -#ifdef CONFIG_XFS_QUOTA -extern void xfs_qm_init(void); -extern void xfs_qm_exit(void); -# define vfs_initquota() xfs_qm_init() -# define vfs_exitquota() xfs_qm_exit() -#else -# define vfs_initquota() do { } while (0) -# define vfs_exitquota() do { } while (0) -#endif - -#ifdef CONFIG_XFS_POSIX_ACL -# define XFS_ACL_STRING "ACLs, " -# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL) -#else -# define XFS_ACL_STRING -# define set_posix_acl_flag(sb) do { } while (0) -#endif - -#define XFS_SECURITY_STRING "security attributes, " - -#ifdef CONFIG_XFS_RT -# define XFS_REALTIME_STRING "realtime, " -#else -# define XFS_REALTIME_STRING -#endif - -#if XFS_BIG_BLKNOS -# if XFS_BIG_INUMS -# define XFS_BIGFS_STRING "large block/inode numbers, " -# else -# define XFS_BIGFS_STRING "large block numbers, " -# endif -#else -# define XFS_BIGFS_STRING -#endif - -#ifdef DEBUG -# define XFS_DBG_STRING "debug" -#else -# define XFS_DBG_STRING "no debug" -#endif - -#define XFS_VERSION_STRING "SGI XFS" -#define XFS_BUILD_OPTIONS XFS_ACL_STRING \ - XFS_SECURITY_STRING \ - XFS_REALTIME_STRING \ - XFS_BIGFS_STRING \ - XFS_DBG_STRING /* DBG must be last */ - -struct xfs_inode; -struct xfs_mount; -struct xfs_buftarg; -struct block_device; - -extern __uint64_t xfs_max_file_offset(unsigned int); - -extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); - -extern const struct export_operations xfs_export_operations; -extern const struct xattr_handler *xfs_xattr_handlers[]; -extern const struct quotactl_ops xfs_quotactl_operations; - -#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) - -#endif /* __XFS_SUPER_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c deleted file mode 100644 index 4604f90f86a3..000000000000 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ /dev/null @@ -1,1065 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_dinode.h" -#include "xfs_error.h" -#include "xfs_filestream.h" -#include "xfs_vnodeops.h" -#include "xfs_inode_item.h" -#include "xfs_quota.h" -#include "xfs_trace.h" -#include "xfs_fsops.h" - -#include -#include - -struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ - -/* - * The inode lookup is done in batches to keep the amount of lock traffic and - * radix tree lookups to a minimum. The batch size is a trade off between - * lookup reduction and stack usage. This is in the reclaim path, so we can't - * be too greedy. - */ -#define XFS_LOOKUP_BATCH 32 - -STATIC int -xfs_inode_ag_walk_grab( - struct xfs_inode *ip) -{ - struct inode *inode = VFS_I(ip); - - ASSERT(rcu_read_lock_held()); - - /* - * check for stale RCU freed inode - * - * If the inode has been reallocated, it doesn't matter if it's not in - * the AG we are walking - we are walking for writeback, so if it - * passes all the "valid inode" checks and is dirty, then we'll write - * it back anyway. If it has been reallocated and still being - * initialised, the XFS_INEW check below will catch it. - */ - spin_lock(&ip->i_flags_lock); - if (!ip->i_ino) - goto out_unlock_noent; - - /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ - if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) - goto out_unlock_noent; - spin_unlock(&ip->i_flags_lock); - - /* nothing to sync during shutdown */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return EFSCORRUPTED; - - /* If we can't grab the inode, it must on it's way to reclaim. */ - if (!igrab(inode)) - return ENOENT; - - if (is_bad_inode(inode)) { - IRELE(ip); - return ENOENT; - } - - /* inode is valid */ - return 0; - -out_unlock_noent: - spin_unlock(&ip->i_flags_lock); - return ENOENT; -} - -STATIC int -xfs_inode_ag_walk( - struct xfs_mount *mp, - struct xfs_perag *pag, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags), - int flags) -{ - uint32_t first_index; - int last_error = 0; - int skipped; - int done; - int nr_found; - -restart: - done = 0; - skipped = 0; - first_index = 0; - nr_found = 0; - do { - struct xfs_inode *batch[XFS_LOOKUP_BATCH]; - int error = 0; - int i; - - rcu_read_lock(); - nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, - (void **)batch, first_index, - XFS_LOOKUP_BATCH); - if (!nr_found) { - rcu_read_unlock(); - break; - } - - /* - * Grab the inodes before we drop the lock. if we found - * nothing, nr == 0 and the loop will be skipped. - */ - for (i = 0; i < nr_found; i++) { - struct xfs_inode *ip = batch[i]; - - if (done || xfs_inode_ag_walk_grab(ip)) - batch[i] = NULL; - - /* - * Update the index for the next lookup. Catch - * overflows into the next AG range which can occur if - * we have inodes in the last block of the AG and we - * are currently pointing to the last inode. - * - * Because we may see inodes that are from the wrong AG - * due to RCU freeing and reallocation, only update the - * index if it lies in this AG. It was a race that lead - * us to see this inode, so another lookup from the - * same index will not find it again. - */ - if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) - continue; - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) - done = 1; - } - - /* unlock now we've grabbed the inodes. */ - rcu_read_unlock(); - - for (i = 0; i < nr_found; i++) { - if (!batch[i]) - continue; - error = execute(batch[i], pag, flags); - IRELE(batch[i]); - if (error == EAGAIN) { - skipped++; - continue; - } - if (error && last_error != EFSCORRUPTED) - last_error = error; - } - - /* bail out if the filesystem is corrupted. */ - if (error == EFSCORRUPTED) - break; - - cond_resched(); - - } while (nr_found && !done); - - if (skipped) { - delay(1); - goto restart; - } - return last_error; -} - -int -xfs_inode_ag_iterator( - struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, - struct xfs_perag *pag, int flags), - int flags) -{ - struct xfs_perag *pag; - int error = 0; - int last_error = 0; - xfs_agnumber_t ag; - - ag = 0; - while ((pag = xfs_perag_get(mp, ag))) { - ag = pag->pag_agno + 1; - error = xfs_inode_ag_walk(mp, pag, execute, flags); - xfs_perag_put(pag); - if (error) { - last_error = error; - if (error == EFSCORRUPTED) - break; - } - } - return XFS_ERROR(last_error); -} - -STATIC int -xfs_sync_inode_data( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) -{ - struct inode *inode = VFS_I(ip); - struct address_space *mapping = inode->i_mapping; - int error = 0; - - if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) - goto out_wait; - - if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { - if (flags & SYNC_TRYLOCK) - goto out_wait; - xfs_ilock(ip, XFS_IOLOCK_SHARED); - } - - error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? - 0 : XBF_ASYNC, FI_NONE); - xfs_iunlock(ip, XFS_IOLOCK_SHARED); - - out_wait: - if (flags & SYNC_WAIT) - xfs_ioend_wait(ip); - return error; -} - -STATIC int -xfs_sync_inode_attr( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) -{ - int error = 0; - - xfs_ilock(ip, XFS_ILOCK_SHARED); - if (xfs_inode_clean(ip)) - goto out_unlock; - if (!xfs_iflock_nowait(ip)) { - if (!(flags & SYNC_WAIT)) - goto out_unlock; - xfs_iflock(ip); - } - - if (xfs_inode_clean(ip)) { - xfs_ifunlock(ip); - goto out_unlock; - } - - error = xfs_iflush(ip, flags); - - /* - * We don't want to try again on non-blocking flushes that can't run - * again immediately. If an inode really must be written, then that's - * what the SYNC_WAIT flag is for. - */ - if (error == EAGAIN) { - ASSERT(!(flags & SYNC_WAIT)); - error = 0; - } - - out_unlock: - xfs_iunlock(ip, XFS_ILOCK_SHARED); - return error; -} - -/* - * Write out pagecache data for the whole filesystem. - */ -STATIC int -xfs_sync_data( - struct xfs_mount *mp, - int flags) -{ - int error; - - ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); - - error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); - if (error) - return XFS_ERROR(error); - - xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); - return 0; -} - -/* - * Write out inode metadata (attributes) for the whole filesystem. - */ -STATIC int -xfs_sync_attr( - struct xfs_mount *mp, - int flags) -{ - ASSERT((flags & ~SYNC_WAIT) == 0); - - return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); -} - -STATIC int -xfs_sync_fsdata( - struct xfs_mount *mp) -{ - struct xfs_buf *bp; - - /* - * If the buffer is pinned then push on the log so we won't get stuck - * waiting in the write for someone, maybe ourselves, to flush the log. - * - * Even though we just pushed the log above, we did not have the - * superblock buffer locked at that point so it can become pinned in - * between there and here. - */ - bp = xfs_getsb(mp, 0); - if (xfs_buf_ispinned(bp)) - xfs_log_force(mp, 0); - - return xfs_bwrite(mp, bp); -} - -/* - * When remounting a filesystem read-only or freezing the filesystem, we have - * two phases to execute. This first phase is syncing the data before we - * quiesce the filesystem, and the second is flushing all the inodes out after - * we've waited for all the transactions created by the first phase to - * complete. The second phase ensures that the inodes are written to their - * location on disk rather than just existing in transactions in the log. This - * means after a quiesce there is no log replay required to write the inodes to - * disk (this is the main difference between a sync and a quiesce). - */ -/* - * First stage of freeze - no writers will make progress now we are here, - * so we flush delwri and delalloc buffers here, then wait for all I/O to - * complete. Data is frozen at that point. Metadata is not frozen, - * transactions can still occur here so don't bother flushing the buftarg - * because it'll just get dirty again. - */ -int -xfs_quiesce_data( - struct xfs_mount *mp) -{ - int error, error2 = 0; - - xfs_qm_sync(mp, SYNC_TRYLOCK); - xfs_qm_sync(mp, SYNC_WAIT); - - /* force out the newly dirtied log buffers */ - xfs_log_force(mp, XFS_LOG_SYNC); - - /* write superblock and hoover up shutdown errors */ - error = xfs_sync_fsdata(mp); - - /* make sure all delwri buffers are written out */ - xfs_flush_buftarg(mp->m_ddev_targp, 1); - - /* mark the log as covered if needed */ - if (xfs_log_need_covered(mp)) - error2 = xfs_fs_log_dummy(mp); - - /* flush data-only devices */ - if (mp->m_rtdev_targp) - XFS_bflush(mp->m_rtdev_targp); - - return error ? error : error2; -} - -STATIC void -xfs_quiesce_fs( - struct xfs_mount *mp) -{ - int count = 0, pincount; - - xfs_reclaim_inodes(mp, 0); - xfs_flush_buftarg(mp->m_ddev_targp, 0); - - /* - * This loop must run at least twice. The first instance of the loop - * will flush most meta data but that will generate more meta data - * (typically directory updates). Which then must be flushed and - * logged before we can write the unmount record. We also so sync - * reclaim of inodes to catch any that the above delwri flush skipped. - */ - do { - xfs_reclaim_inodes(mp, SYNC_WAIT); - xfs_sync_attr(mp, SYNC_WAIT); - pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); - if (!pincount) { - delay(50); - count++; - } - } while (count < 2); -} - -/* - * Second stage of a quiesce. The data is already synced, now we have to take - * care of the metadata. New transactions are already blocked, so we need to - * wait for any remaining transactions to drain out before proceeding. - */ -void -xfs_quiesce_attr( - struct xfs_mount *mp) -{ - int error = 0; - - /* wait for all modifications to complete */ - while (atomic_read(&mp->m_active_trans) > 0) - delay(100); - - /* flush inodes and push all remaining buffers out to disk */ - xfs_quiesce_fs(mp); - - /* - * Just warn here till VFS can correctly support - * read-only remount without racing. - */ - WARN_ON(atomic_read(&mp->m_active_trans) != 0); - - /* Push the superblock and write an unmount record */ - error = xfs_log_sbcount(mp); - if (error) - xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " - "Frozen image may not be consistent."); - xfs_log_unmount_write(mp); - xfs_unmountfs_writesb(mp); -} - -static void -xfs_syncd_queue_sync( - struct xfs_mount *mp) -{ - queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, - msecs_to_jiffies(xfs_syncd_centisecs * 10)); -} - -/* - * Every sync period we need to unpin all items, reclaim inodes and sync - * disk quotas. We might need to cover the log to indicate that the - * filesystem is idle and not frozen. - */ -STATIC void -xfs_sync_worker( - struct work_struct *work) -{ - struct xfs_mount *mp = container_of(to_delayed_work(work), - struct xfs_mount, m_sync_work); - int error; - - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - /* dgc: errors ignored here */ - if (mp->m_super->s_frozen == SB_UNFROZEN && - xfs_log_need_covered(mp)) - error = xfs_fs_log_dummy(mp); - else - xfs_log_force(mp, 0); - error = xfs_qm_sync(mp, SYNC_TRYLOCK); - - /* start pushing all the metadata that is currently dirty */ - xfs_ail_push_all(mp->m_ail); - } - - /* queue us up again */ - xfs_syncd_queue_sync(mp); -} - -/* - * Queue a new inode reclaim pass if there are reclaimable inodes and there - * isn't a reclaim pass already in progress. By default it runs every 5s based - * on the xfs syncd work default of 30s. Perhaps this should have it's own - * tunable, but that can be done if this method proves to be ineffective or too - * aggressive. - */ -static void -xfs_syncd_queue_reclaim( - struct xfs_mount *mp) -{ - - /* - * We can have inodes enter reclaim after we've shut down the syncd - * workqueue during unmount, so don't allow reclaim work to be queued - * during unmount. - */ - if (!(mp->m_super->s_flags & MS_ACTIVE)) - return; - - rcu_read_lock(); - if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { - queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, - msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); - } - rcu_read_unlock(); -} - -/* - * This is a fast pass over the inode cache to try to get reclaim moving on as - * many inodes as possible in a short period of time. It kicks itself every few - * seconds, as well as being kicked by the inode cache shrinker when memory - * goes low. It scans as quickly as possible avoiding locked inodes or those - * already being flushed, and once done schedules a future pass. - */ -STATIC void -xfs_reclaim_worker( - struct work_struct *work) -{ - struct xfs_mount *mp = container_of(to_delayed_work(work), - struct xfs_mount, m_reclaim_work); - - xfs_reclaim_inodes(mp, SYNC_TRYLOCK); - xfs_syncd_queue_reclaim(mp); -} - -/* - * Flush delayed allocate data, attempting to free up reserved space - * from existing allocations. At this point a new allocation attempt - * has failed with ENOSPC and we are in the process of scratching our - * heads, looking about for more room. - * - * Queue a new data flush if there isn't one already in progress and - * wait for completion of the flush. This means that we only ever have one - * inode flush in progress no matter how many ENOSPC events are occurring and - * so will prevent the system from bogging down due to every concurrent - * ENOSPC event scanning all the active inodes in the system for writeback. - */ -void -xfs_flush_inodes( - struct xfs_inode *ip) -{ - struct xfs_mount *mp = ip->i_mount; - - queue_work(xfs_syncd_wq, &mp->m_flush_work); - flush_work_sync(&mp->m_flush_work); -} - -STATIC void -xfs_flush_worker( - struct work_struct *work) -{ - struct xfs_mount *mp = container_of(work, - struct xfs_mount, m_flush_work); - - xfs_sync_data(mp, SYNC_TRYLOCK); - xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); -} - -int -xfs_syncd_init( - struct xfs_mount *mp) -{ - INIT_WORK(&mp->m_flush_work, xfs_flush_worker); - INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); - INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); - - xfs_syncd_queue_sync(mp); - xfs_syncd_queue_reclaim(mp); - - return 0; -} - -void -xfs_syncd_stop( - struct xfs_mount *mp) -{ - cancel_delayed_work_sync(&mp->m_sync_work); - cancel_delayed_work_sync(&mp->m_reclaim_work); - cancel_work_sync(&mp->m_flush_work); -} - -void -__xfs_inode_set_reclaim_tag( - struct xfs_perag *pag, - struct xfs_inode *ip) -{ - radix_tree_tag_set(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), - XFS_ICI_RECLAIM_TAG); - - if (!pag->pag_ici_reclaimable) { - /* propagate the reclaim tag up into the perag radix tree */ - spin_lock(&ip->i_mount->m_perag_lock); - radix_tree_tag_set(&ip->i_mount->m_perag_tree, - XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), - XFS_ICI_RECLAIM_TAG); - spin_unlock(&ip->i_mount->m_perag_lock); - - /* schedule periodic background inode reclaim */ - xfs_syncd_queue_reclaim(ip->i_mount); - - trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, - -1, _RET_IP_); - } - pag->pag_ici_reclaimable++; -} - -/* - * We set the inode flag atomically with the radix tree tag. - * Once we get tag lookups on the radix tree, this inode flag - * can go away. - */ -void -xfs_inode_set_reclaim_tag( - xfs_inode_t *ip) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_perag *pag; - - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); - spin_lock(&pag->pag_ici_lock); - spin_lock(&ip->i_flags_lock); - __xfs_inode_set_reclaim_tag(pag, ip); - __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - spin_unlock(&ip->i_flags_lock); - spin_unlock(&pag->pag_ici_lock); - xfs_perag_put(pag); -} - -STATIC void -__xfs_inode_clear_reclaim( - xfs_perag_t *pag, - xfs_inode_t *ip) -{ - pag->pag_ici_reclaimable--; - if (!pag->pag_ici_reclaimable) { - /* clear the reclaim tag from the perag radix tree */ - spin_lock(&ip->i_mount->m_perag_lock); - radix_tree_tag_clear(&ip->i_mount->m_perag_tree, - XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), - XFS_ICI_RECLAIM_TAG); - spin_unlock(&ip->i_mount->m_perag_lock); - trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, - -1, _RET_IP_); - } -} - -void -__xfs_inode_clear_reclaim_tag( - xfs_mount_t *mp, - xfs_perag_t *pag, - xfs_inode_t *ip) -{ - radix_tree_tag_clear(&pag->pag_ici_root, - XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); - __xfs_inode_clear_reclaim(pag, ip); -} - -/* - * Grab the inode for reclaim exclusively. - * Return 0 if we grabbed it, non-zero otherwise. - */ -STATIC int -xfs_reclaim_inode_grab( - struct xfs_inode *ip, - int flags) -{ - ASSERT(rcu_read_lock_held()); - - /* quick check for stale RCU freed inode */ - if (!ip->i_ino) - return 1; - - /* - * do some unlocked checks first to avoid unnecessary lock traffic. - * The first is a flush lock check, the second is a already in reclaim - * check. Only do these checks if we are not going to block on locks. - */ - if ((flags & SYNC_TRYLOCK) && - (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { - return 1; - } - - /* - * The radix tree lock here protects a thread in xfs_iget from racing - * with us starting reclaim on the inode. Once we have the - * XFS_IRECLAIM flag set it will not touch us. - * - * Due to RCU lookup, we may find inodes that have been freed and only - * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that - * aren't candidates for reclaim at all, so we must check the - * XFS_IRECLAIMABLE is set first before proceeding to reclaim. - */ - spin_lock(&ip->i_flags_lock); - if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || - __xfs_iflags_test(ip, XFS_IRECLAIM)) { - /* not a reclaim candidate. */ - spin_unlock(&ip->i_flags_lock); - return 1; - } - __xfs_iflags_set(ip, XFS_IRECLAIM); - spin_unlock(&ip->i_flags_lock); - return 0; -} - -/* - * Inodes in different states need to be treated differently, and the return - * value of xfs_iflush is not sufficient to get this right. The following table - * lists the inode states and the reclaim actions necessary for non-blocking - * reclaim: - * - * - * inode state iflush ret required action - * --------------- ---------- --------------- - * bad - reclaim - * shutdown EIO unpin and reclaim - * clean, unpinned 0 reclaim - * stale, unpinned 0 reclaim - * clean, pinned(*) 0 requeue - * stale, pinned EAGAIN requeue - * dirty, delwri ok 0 requeue - * dirty, delwri blocked EAGAIN requeue - * dirty, sync flush 0 reclaim - * - * (*) dgc: I don't think the clean, pinned state is possible but it gets - * handled anyway given the order of checks implemented. - * - * As can be seen from the table, the return value of xfs_iflush() is not - * sufficient to correctly decide the reclaim action here. The checks in - * xfs_iflush() might look like duplicates, but they are not. - * - * Also, because we get the flush lock first, we know that any inode that has - * been flushed delwri has had the flush completed by the time we check that - * the inode is clean. The clean inode check needs to be done before flushing - * the inode delwri otherwise we would loop forever requeuing clean inodes as - * we cannot tell apart a successful delwri flush and a clean inode from the - * return value of xfs_iflush(). - * - * Note that because the inode is flushed delayed write by background - * writeback, the flush lock may already be held here and waiting on it can - * result in very long latencies. Hence for sync reclaims, where we wait on the - * flush lock, the caller should push out delayed write inodes first before - * trying to reclaim them to minimise the amount of time spent waiting. For - * background relaim, we just requeue the inode for the next pass. - * - * Hence the order of actions after gaining the locks should be: - * bad => reclaim - * shutdown => unpin and reclaim - * pinned, delwri => requeue - * pinned, sync => unpin - * stale => reclaim - * clean => reclaim - * dirty, delwri => flush and requeue - * dirty, sync => flush, wait and reclaim - */ -STATIC int -xfs_reclaim_inode( - struct xfs_inode *ip, - struct xfs_perag *pag, - int sync_mode) -{ - int error; - -restart: - error = 0; - xfs_ilock(ip, XFS_ILOCK_EXCL); - if (!xfs_iflock_nowait(ip)) { - if (!(sync_mode & SYNC_WAIT)) - goto out; - xfs_iflock(ip); - } - - if (is_bad_inode(VFS_I(ip))) - goto reclaim; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_iunpin_wait(ip); - goto reclaim; - } - if (xfs_ipincount(ip)) { - if (!(sync_mode & SYNC_WAIT)) { - xfs_ifunlock(ip); - goto out; - } - xfs_iunpin_wait(ip); - } - if (xfs_iflags_test(ip, XFS_ISTALE)) - goto reclaim; - if (xfs_inode_clean(ip)) - goto reclaim; - - /* - * Now we have an inode that needs flushing. - * - * We do a nonblocking flush here even if we are doing a SYNC_WAIT - * reclaim as we can deadlock with inode cluster removal. - * xfs_ifree_cluster() can lock the inode buffer before it locks the - * ip->i_lock, and we are doing the exact opposite here. As a result, - * doing a blocking xfs_itobp() to get the cluster buffer will result - * in an ABBA deadlock with xfs_ifree_cluster(). - * - * As xfs_ifree_cluser() must gather all inodes that are active in the - * cache to mark them stale, if we hit this case we don't actually want - * to do IO here - we want the inode marked stale so we can simply - * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, - * just unlock the inode, back off and try again. Hopefully the next - * pass through will see the stale flag set on the inode. - */ - error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); - if (sync_mode & SYNC_WAIT) { - if (error == EAGAIN) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* backoff longer than in xfs_ifree_cluster */ - delay(2); - goto restart; - } - xfs_iflock(ip); - goto reclaim; - } - - /* - * When we have to flush an inode but don't have SYNC_WAIT set, we - * flush the inode out using a delwri buffer and wait for the next - * call into reclaim to find it in a clean state instead of waiting for - * it now. We also don't return errors here - if the error is transient - * then the next reclaim pass will flush the inode, and if the error - * is permanent then the next sync reclaim will reclaim the inode and - * pass on the error. - */ - if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { - xfs_warn(ip->i_mount, - "inode 0x%llx background reclaim flush failed with %d", - (long long)ip->i_ino, error); - } -out: - xfs_iflags_clear(ip, XFS_IRECLAIM); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* - * We could return EAGAIN here to make reclaim rescan the inode tree in - * a short while. However, this just burns CPU time scanning the tree - * waiting for IO to complete and xfssyncd never goes back to the idle - * state. Instead, return 0 to let the next scheduled background reclaim - * attempt to reclaim the inode again. - */ - return 0; - -reclaim: - xfs_ifunlock(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - XFS_STATS_INC(xs_ig_reclaims); - /* - * Remove the inode from the per-AG radix tree. - * - * Because radix_tree_delete won't complain even if the item was never - * added to the tree assert that it's been there before to catch - * problems with the inode life time early on. - */ - spin_lock(&pag->pag_ici_lock); - if (!radix_tree_delete(&pag->pag_ici_root, - XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) - ASSERT(0); - __xfs_inode_clear_reclaim(pag, ip); - spin_unlock(&pag->pag_ici_lock); - - /* - * Here we do an (almost) spurious inode lock in order to coordinate - * with inode cache radix tree lookups. This is because the lookup - * can reference the inodes in the cache without taking references. - * - * We make that OK here by ensuring that we wait until the inode is - * unlocked after the lookup before we go ahead and free it. We get - * both the ilock and the iolock because the code may need to drop the - * ilock one but will still hold the iolock. - */ - xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - xfs_qm_dqdetach(ip); - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); - - xfs_inode_free(ip); - return error; - -} - -/* - * Walk the AGs and reclaim the inodes in them. Even if the filesystem is - * corrupted, we still want to try to reclaim all the inodes. If we don't, - * then a shut down during filesystem unmount reclaim walk leak all the - * unreclaimed inodes. - */ -int -xfs_reclaim_inodes_ag( - struct xfs_mount *mp, - int flags, - int *nr_to_scan) -{ - struct xfs_perag *pag; - int error = 0; - int last_error = 0; - xfs_agnumber_t ag; - int trylock = flags & SYNC_TRYLOCK; - int skipped; - -restart: - ag = 0; - skipped = 0; - while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { - unsigned long first_index = 0; - int done = 0; - int nr_found = 0; - - ag = pag->pag_agno + 1; - - if (trylock) { - if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { - skipped++; - xfs_perag_put(pag); - continue; - } - first_index = pag->pag_ici_reclaim_cursor; - } else - mutex_lock(&pag->pag_ici_reclaim_lock); - - do { - struct xfs_inode *batch[XFS_LOOKUP_BATCH]; - int i; - - rcu_read_lock(); - nr_found = radix_tree_gang_lookup_tag( - &pag->pag_ici_root, - (void **)batch, first_index, - XFS_LOOKUP_BATCH, - XFS_ICI_RECLAIM_TAG); - if (!nr_found) { - done = 1; - rcu_read_unlock(); - break; - } - - /* - * Grab the inodes before we drop the lock. if we found - * nothing, nr == 0 and the loop will be skipped. - */ - for (i = 0; i < nr_found; i++) { - struct xfs_inode *ip = batch[i]; - - if (done || xfs_reclaim_inode_grab(ip, flags)) - batch[i] = NULL; - - /* - * Update the index for the next lookup. Catch - * overflows into the next AG range which can - * occur if we have inodes in the last block of - * the AG and we are currently pointing to the - * last inode. - * - * Because we may see inodes that are from the - * wrong AG due to RCU freeing and - * reallocation, only update the index if it - * lies in this AG. It was a race that lead us - * to see this inode, so another lookup from - * the same index will not find it again. - */ - if (XFS_INO_TO_AGNO(mp, ip->i_ino) != - pag->pag_agno) - continue; - first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); - if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) - done = 1; - } - - /* unlock now we've grabbed the inodes. */ - rcu_read_unlock(); - - for (i = 0; i < nr_found; i++) { - if (!batch[i]) - continue; - error = xfs_reclaim_inode(batch[i], pag, flags); - if (error && last_error != EFSCORRUPTED) - last_error = error; - } - - *nr_to_scan -= XFS_LOOKUP_BATCH; - - cond_resched(); - - } while (nr_found && !done && *nr_to_scan > 0); - - if (trylock && !done) - pag->pag_ici_reclaim_cursor = first_index; - else - pag->pag_ici_reclaim_cursor = 0; - mutex_unlock(&pag->pag_ici_reclaim_lock); - xfs_perag_put(pag); - } - - /* - * if we skipped any AG, and we still have scan count remaining, do - * another pass this time using blocking reclaim semantics (i.e - * waiting on the reclaim locks and ignoring the reclaim cursors). This - * ensure that when we get more reclaimers than AGs we block rather - * than spin trying to execute reclaim. - */ - if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) { - trylock = 0; - goto restart; - } - return XFS_ERROR(last_error); -} - -int -xfs_reclaim_inodes( - xfs_mount_t *mp, - int mode) -{ - int nr_to_scan = INT_MAX; - - return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); -} - -/* - * Scan a certain number of inodes for reclaim. - * - * When called we make sure that there is a background (fast) inode reclaim in - * progress, while we will throttle the speed of reclaim via doing synchronous - * reclaim of inodes. That means if we come across dirty inodes, we wait for - * them to be cleaned, which we hope will not be very long due to the - * background walker having already kicked the IO off on those dirty inodes. - */ -void -xfs_reclaim_inodes_nr( - struct xfs_mount *mp, - int nr_to_scan) -{ - /* kick background reclaimer and push the AIL */ - xfs_syncd_queue_reclaim(mp); - xfs_ail_push_all(mp->m_ail); - - xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); -} - -/* - * Return the number of reclaimable inodes in the filesystem for - * the shrinker to determine how much to reclaim. - */ -int -xfs_reclaim_inodes_count( - struct xfs_mount *mp) -{ - struct xfs_perag *pag; - xfs_agnumber_t ag = 0; - int reclaimable = 0; - - while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { - ag = pag->pag_agno + 1; - reclaimable += pag->pag_ici_reclaimable; - xfs_perag_put(pag); - } - return reclaimable; -} - diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h deleted file mode 100644 index 941202e7ac6e..000000000000 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef XFS_SYNC_H -#define XFS_SYNC_H 1 - -struct xfs_mount; -struct xfs_perag; - -#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ -#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ - -extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ - -int xfs_syncd_init(struct xfs_mount *mp); -void xfs_syncd_stop(struct xfs_mount *mp); - -int xfs_quiesce_data(struct xfs_mount *mp); -void xfs_quiesce_attr(struct xfs_mount *mp); - -void xfs_flush_inodes(struct xfs_inode *ip); - -int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); -int xfs_reclaim_inodes_count(struct xfs_mount *mp); -void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); - -void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); -void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); -void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, - struct xfs_inode *ip); - -int xfs_sync_inode_grab(struct xfs_inode *ip); -int xfs_inode_ag_iterator(struct xfs_mount *mp, - int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), - int flags); - -#endif diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c deleted file mode 100644 index ee2d2adaa438..000000000000 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright (c) 2001-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include -#include -#include "xfs_error.h" - -static struct ctl_table_header *xfs_table_header; - -#ifdef CONFIG_PROC_FS -STATIC int -xfs_stats_clear_proc_handler( - ctl_table *ctl, - int write, - void __user *buffer, - size_t *lenp, - loff_t *ppos) -{ - int c, ret, *valp = ctl->data; - __uint32_t vn_active; - - ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); - - if (!ret && write && *valp) { - xfs_notice(NULL, "Clearing xfsstats"); - for_each_possible_cpu(c) { - preempt_disable(); - /* save vn_active, it's a universal truth! */ - vn_active = per_cpu(xfsstats, c).vn_active; - memset(&per_cpu(xfsstats, c), 0, - sizeof(struct xfsstats)); - per_cpu(xfsstats, c).vn_active = vn_active; - preempt_enable(); - } - xfs_stats_clear = 0; - } - - return ret; -} - -STATIC int -xfs_panic_mask_proc_handler( - ctl_table *ctl, - int write, - void __user *buffer, - size_t *lenp, - loff_t *ppos) -{ - int ret, *valp = ctl->data; - - ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); - if (!ret && write) { - xfs_panic_mask = *valp; -#ifdef DEBUG - xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); -#endif - } - return ret; -} -#endif /* CONFIG_PROC_FS */ - -static ctl_table xfs_table[] = { - { - .procname = "irix_sgid_inherit", - .data = &xfs_params.sgid_inherit.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.sgid_inherit.min, - .extra2 = &xfs_params.sgid_inherit.max - }, - { - .procname = "irix_symlink_mode", - .data = &xfs_params.symlink_mode.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.symlink_mode.min, - .extra2 = &xfs_params.symlink_mode.max - }, - { - .procname = "panic_mask", - .data = &xfs_params.panic_mask.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = xfs_panic_mask_proc_handler, - .extra1 = &xfs_params.panic_mask.min, - .extra2 = &xfs_params.panic_mask.max - }, - - { - .procname = "error_level", - .data = &xfs_params.error_level.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.error_level.min, - .extra2 = &xfs_params.error_level.max - }, - { - .procname = "xfssyncd_centisecs", - .data = &xfs_params.syncd_timer.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.syncd_timer.min, - .extra2 = &xfs_params.syncd_timer.max - }, - { - .procname = "inherit_sync", - .data = &xfs_params.inherit_sync.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.inherit_sync.min, - .extra2 = &xfs_params.inherit_sync.max - }, - { - .procname = "inherit_nodump", - .data = &xfs_params.inherit_nodump.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.inherit_nodump.min, - .extra2 = &xfs_params.inherit_nodump.max - }, - { - .procname = "inherit_noatime", - .data = &xfs_params.inherit_noatim.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.inherit_noatim.min, - .extra2 = &xfs_params.inherit_noatim.max - }, - { - .procname = "xfsbufd_centisecs", - .data = &xfs_params.xfs_buf_timer.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.xfs_buf_timer.min, - .extra2 = &xfs_params.xfs_buf_timer.max - }, - { - .procname = "age_buffer_centisecs", - .data = &xfs_params.xfs_buf_age.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.xfs_buf_age.min, - .extra2 = &xfs_params.xfs_buf_age.max - }, - { - .procname = "inherit_nosymlinks", - .data = &xfs_params.inherit_nosym.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.inherit_nosym.min, - .extra2 = &xfs_params.inherit_nosym.max - }, - { - .procname = "rotorstep", - .data = &xfs_params.rotorstep.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.rotorstep.min, - .extra2 = &xfs_params.rotorstep.max - }, - { - .procname = "inherit_nodefrag", - .data = &xfs_params.inherit_nodfrg.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.inherit_nodfrg.min, - .extra2 = &xfs_params.inherit_nodfrg.max - }, - { - .procname = "filestream_centisecs", - .data = &xfs_params.fstrm_timer.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &xfs_params.fstrm_timer.min, - .extra2 = &xfs_params.fstrm_timer.max, - }, - /* please keep this the last entry */ -#ifdef CONFIG_PROC_FS - { - .procname = "stats_clear", - .data = &xfs_params.stats_clear.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = xfs_stats_clear_proc_handler, - .extra1 = &xfs_params.stats_clear.min, - .extra2 = &xfs_params.stats_clear.max - }, -#endif /* CONFIG_PROC_FS */ - - {} -}; - -static ctl_table xfs_dir_table[] = { - { - .procname = "xfs", - .mode = 0555, - .child = xfs_table - }, - {} -}; - -static ctl_table xfs_root_table[] = { - { - .procname = "fs", - .mode = 0555, - .child = xfs_dir_table - }, - {} -}; - -int -xfs_sysctl_register(void) -{ - xfs_table_header = register_sysctl_table(xfs_root_table); - if (!xfs_table_header) - return -ENOMEM; - return 0; -} - -void -xfs_sysctl_unregister(void) -{ - unregister_sysctl_table(xfs_table_header); -} diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h deleted file mode 100644 index b9937d450f8e..000000000000 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2001-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SYSCTL_H__ -#define __XFS_SYSCTL_H__ - -#include - -/* - * Tunable xfs parameters - */ - -typedef struct xfs_sysctl_val { - int min; - int val; - int max; -} xfs_sysctl_val_t; - -typedef struct xfs_param { - xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is - * not a member of parent dir GID. */ - xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ - xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */ - xfs_sysctl_val_t error_level; /* Degree of reporting for problems */ - xfs_sysctl_val_t syncd_timer; /* Interval between xfssyncd wakeups */ - xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */ - xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */ - xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */ - xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */ - xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */ - xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */ - xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ - xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ - xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ - xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */ -} xfs_param_t; - -/* - * xfs_error_level: - * - * How much error reporting will be done when internal problems are - * encountered. These problems normally return an EFSCORRUPTED to their - * caller, with no other information reported. - * - * 0 No error reports - * 1 Report EFSCORRUPTED errors that will cause a filesystem shutdown - * 5 Report all EFSCORRUPTED errors (all of the above errors, plus any - * additional errors that are known to not cause shutdowns) - * - * xfs_panic_mask bit 0x8 turns the error reports into panics - */ - -enum { - /* XFS_REFCACHE_SIZE = 1 */ - /* XFS_REFCACHE_PURGE = 2 */ - /* XFS_RESTRICT_CHOWN = 3 */ - XFS_SGID_INHERIT = 4, - XFS_SYMLINK_MODE = 5, - XFS_PANIC_MASK = 6, - XFS_ERRLEVEL = 7, - XFS_SYNCD_TIMER = 8, - /* XFS_PROBE_DMAPI = 9 */ - /* XFS_PROBE_IOOPS = 10 */ - /* XFS_PROBE_QUOTA = 11 */ - XFS_STATS_CLEAR = 12, - XFS_INHERIT_SYNC = 13, - XFS_INHERIT_NODUMP = 14, - XFS_INHERIT_NOATIME = 15, - XFS_BUF_TIMER = 16, - XFS_BUF_AGE = 17, - /* XFS_IO_BYPASS = 18 */ - XFS_INHERIT_NOSYM = 19, - XFS_ROTORSTEP = 20, - XFS_INHERIT_NODFRG = 21, - XFS_FILESTREAM_TIMER = 22, -}; - -extern xfs_param_t xfs_params; - -#ifdef CONFIG_SYSCTL -extern int xfs_sysctl_register(void); -extern void xfs_sysctl_unregister(void); -#else -# define xfs_sysctl_register() (0) -# define xfs_sysctl_unregister() do { } while (0) -#endif /* CONFIG_SYSCTL */ - -#endif /* __XFS_SYSCTL_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c deleted file mode 100644 index 9010ce885e6a..000000000000 --- a/fs/xfs/linux-2.6/xfs_trace.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2009, Christoph Hellwig - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_mount.h" -#include "xfs_ialloc.h" -#include "xfs_itable.h" -#include "xfs_alloc.h" -#include "xfs_bmap.h" -#include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_log_priv.h" -#include "xfs_buf_item.h" -#include "xfs_quota.h" -#include "xfs_iomap.h" -#include "xfs_aops.h" -#include "xfs_dquot_item.h" -#include "xfs_dquot.h" -#include "xfs_log_recover.h" -#include "xfs_inode_item.h" - -/* - * We include this last to have the helpers above available for the trace - * event implementations. - */ -#define CREATE_TRACE_POINTS -#include "xfs_trace.h" diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h deleted file mode 100644 index 690fc7a7bd72..000000000000 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ /dev/null @@ -1,1746 +0,0 @@ -/* - * Copyright (c) 2009, Christoph Hellwig - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM xfs - -#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_XFS_H - -#include - -struct xfs_agf; -struct xfs_alloc_arg; -struct xfs_attr_list_context; -struct xfs_buf_log_item; -struct xfs_da_args; -struct xfs_da_node_entry; -struct xfs_dquot; -struct xlog_ticket; -struct log; -struct xlog_recover; -struct xlog_recover_item; -struct xfs_buf_log_format; -struct xfs_inode_log_format; - -DECLARE_EVENT_CLASS(xfs_attr_list_class, - TP_PROTO(struct xfs_attr_list_context *ctx), - TP_ARGS(ctx), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(u32, hashval) - __field(u32, blkno) - __field(u32, offset) - __field(void *, alist) - __field(int, bufsize) - __field(int, count) - __field(int, firstu) - __field(int, dupcnt) - __field(int, flags) - ), - TP_fast_assign( - __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; - __entry->ino = ctx->dp->i_ino; - __entry->hashval = ctx->cursor->hashval; - __entry->blkno = ctx->cursor->blkno; - __entry->offset = ctx->cursor->offset; - __entry->alist = ctx->alist; - __entry->bufsize = ctx->bufsize; - __entry->count = ctx->count; - __entry->firstu = ctx->firstu; - __entry->flags = ctx->flags; - ), - TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " - "alist 0x%p size %u count %u firstu %u flags %d %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->hashval, - __entry->blkno, - __entry->offset, - __entry->dupcnt, - __entry->alist, - __entry->bufsize, - __entry->count, - __entry->firstu, - __entry->flags, - __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) - ) -) - -#define DEFINE_ATTR_LIST_EVENT(name) \ -DEFINE_EVENT(xfs_attr_list_class, name, \ - TP_PROTO(struct xfs_attr_list_context *ctx), \ - TP_ARGS(ctx)) -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); -DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); - -DECLARE_EVENT_CLASS(xfs_perag_class, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, - unsigned long caller_ip), - TP_ARGS(mp, agno, refcount, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(int, refcount) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->refcount = refcount; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d agno %u refcount %d caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->refcount, - (char *)__entry->caller_ip) -); - -#define DEFINE_PERAG_REF_EVENT(name) \ -DEFINE_EVENT(xfs_perag_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ - unsigned long caller_ip), \ - TP_ARGS(mp, agno, refcount, caller_ip)) -DEFINE_PERAG_REF_EVENT(xfs_perag_get); -DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); -DEFINE_PERAG_REF_EVENT(xfs_perag_put); -DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); -DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); - -TRACE_EVENT(xfs_attr_list_node_descend, - TP_PROTO(struct xfs_attr_list_context *ctx, - struct xfs_da_node_entry *btree), - TP_ARGS(ctx, btree), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(u32, hashval) - __field(u32, blkno) - __field(u32, offset) - __field(void *, alist) - __field(int, bufsize) - __field(int, count) - __field(int, firstu) - __field(int, dupcnt) - __field(int, flags) - __field(u32, bt_hashval) - __field(u32, bt_before) - ), - TP_fast_assign( - __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; - __entry->ino = ctx->dp->i_ino; - __entry->hashval = ctx->cursor->hashval; - __entry->blkno = ctx->cursor->blkno; - __entry->offset = ctx->cursor->offset; - __entry->alist = ctx->alist; - __entry->bufsize = ctx->bufsize; - __entry->count = ctx->count; - __entry->firstu = ctx->firstu; - __entry->flags = ctx->flags; - __entry->bt_hashval = be32_to_cpu(btree->hashval); - __entry->bt_before = be32_to_cpu(btree->before); - ), - TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " - "alist 0x%p size %u count %u firstu %u flags %d %s " - "node hashval %u, node before %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->hashval, - __entry->blkno, - __entry->offset, - __entry->dupcnt, - __entry->alist, - __entry->bufsize, - __entry->count, - __entry->firstu, - __entry->flags, - __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS), - __entry->bt_hashval, - __entry->bt_before) -); - -TRACE_EVENT(xfs_iext_insert, - TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, - struct xfs_bmbt_irec *r, int state, unsigned long caller_ip), - TP_ARGS(ip, idx, r, state, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_extnum_t, idx) - __field(xfs_fileoff_t, startoff) - __field(xfs_fsblock_t, startblock) - __field(xfs_filblks_t, blockcount) - __field(xfs_exntst_t, state) - __field(int, bmap_state) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->idx = idx; - __entry->startoff = r->br_startoff; - __entry->startblock = r->br_startblock; - __entry->blockcount = r->br_blockcount; - __entry->state = r->br_state; - __entry->bmap_state = state; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " - "offset %lld block %lld count %lld flag %d caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), - (long)__entry->idx, - __entry->startoff, - (__int64_t)__entry->startblock, - __entry->blockcount, - __entry->state, - (char *)__entry->caller_ip) -); - -DECLARE_EVENT_CLASS(xfs_bmap_class, - TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, - unsigned long caller_ip), - TP_ARGS(ip, idx, state, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_extnum_t, idx) - __field(xfs_fileoff_t, startoff) - __field(xfs_fsblock_t, startblock) - __field(xfs_filblks_t, blockcount) - __field(xfs_exntst_t, state) - __field(int, bmap_state) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? - ip->i_afp : &ip->i_df; - struct xfs_bmbt_irec r; - - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->idx = idx; - __entry->startoff = r.br_startoff; - __entry->startblock = r.br_startblock; - __entry->blockcount = r.br_blockcount; - __entry->state = r.br_state; - __entry->bmap_state = state; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " - "offset %lld block %lld count %lld flag %d caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), - (long)__entry->idx, - __entry->startoff, - (__int64_t)__entry->startblock, - __entry->blockcount, - __entry->state, - (char *)__entry->caller_ip) -) - -#define DEFINE_BMAP_EVENT(name) \ -DEFINE_EVENT(xfs_bmap_class, name, \ - TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ - unsigned long caller_ip), \ - TP_ARGS(ip, idx, state, caller_ip)) -DEFINE_BMAP_EVENT(xfs_iext_remove); -DEFINE_BMAP_EVENT(xfs_bmap_pre_update); -DEFINE_BMAP_EVENT(xfs_bmap_post_update); -DEFINE_BMAP_EVENT(xfs_extlist); - -DECLARE_EVENT_CLASS(xfs_buf_class, - TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), - TP_ARGS(bp, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_daddr_t, bno) - __field(size_t, buffer_length) - __field(int, hold) - __field(int, pincount) - __field(unsigned, lockval) - __field(unsigned, flags) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = bp->b_target->bt_dev; - __entry->bno = bp->b_bn; - __entry->buffer_length = bp->b_buffer_length; - __entry->hold = atomic_read(&bp->b_hold); - __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; - __entry->flags = bp->b_flags; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d flags %s caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long long)__entry->bno, - __entry->buffer_length, - __entry->hold, - __entry->pincount, - __entry->lockval, - __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), - (void *)__entry->caller_ip) -) - -#define DEFINE_BUF_EVENT(name) \ -DEFINE_EVENT(xfs_buf_class, name, \ - TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ - TP_ARGS(bp, caller_ip)) -DEFINE_BUF_EVENT(xfs_buf_init); -DEFINE_BUF_EVENT(xfs_buf_free); -DEFINE_BUF_EVENT(xfs_buf_hold); -DEFINE_BUF_EVENT(xfs_buf_rele); -DEFINE_BUF_EVENT(xfs_buf_iodone); -DEFINE_BUF_EVENT(xfs_buf_iorequest); -DEFINE_BUF_EVENT(xfs_buf_bawrite); -DEFINE_BUF_EVENT(xfs_buf_bdwrite); -DEFINE_BUF_EVENT(xfs_buf_lock); -DEFINE_BUF_EVENT(xfs_buf_lock_done); -DEFINE_BUF_EVENT(xfs_buf_trylock); -DEFINE_BUF_EVENT(xfs_buf_unlock); -DEFINE_BUF_EVENT(xfs_buf_iowait); -DEFINE_BUF_EVENT(xfs_buf_iowait_done); -DEFINE_BUF_EVENT(xfs_buf_delwri_queue); -DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); -DEFINE_BUF_EVENT(xfs_buf_delwri_split); -DEFINE_BUF_EVENT(xfs_buf_get_uncached); -DEFINE_BUF_EVENT(xfs_bdstrat_shut); -DEFINE_BUF_EVENT(xfs_buf_item_relse); -DEFINE_BUF_EVENT(xfs_buf_item_iodone); -DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); -DEFINE_BUF_EVENT(xfs_buf_error_relse); -DEFINE_BUF_EVENT(xfs_trans_read_buf_io); -DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); - -/* not really buffer traces, but the buf provides useful information */ -DEFINE_BUF_EVENT(xfs_btree_corrupt); -DEFINE_BUF_EVENT(xfs_da_btree_corrupt); -DEFINE_BUF_EVENT(xfs_reset_dqcounts); -DEFINE_BUF_EVENT(xfs_inode_item_push); - -/* pass flags explicitly */ -DECLARE_EVENT_CLASS(xfs_buf_flags_class, - TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), - TP_ARGS(bp, flags, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_daddr_t, bno) - __field(size_t, buffer_length) - __field(int, hold) - __field(int, pincount) - __field(unsigned, lockval) - __field(unsigned, flags) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = bp->b_target->bt_dev; - __entry->bno = bp->b_bn; - __entry->buffer_length = bp->b_buffer_length; - __entry->flags = flags; - __entry->hold = atomic_read(&bp->b_hold); - __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d flags %s caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long long)__entry->bno, - __entry->buffer_length, - __entry->hold, - __entry->pincount, - __entry->lockval, - __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), - (void *)__entry->caller_ip) -) - -#define DEFINE_BUF_FLAGS_EVENT(name) \ -DEFINE_EVENT(xfs_buf_flags_class, name, \ - TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ - TP_ARGS(bp, flags, caller_ip)) -DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); -DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); -DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); - -TRACE_EVENT(xfs_buf_ioerror, - TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip), - TP_ARGS(bp, error, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_daddr_t, bno) - __field(size_t, buffer_length) - __field(unsigned, flags) - __field(int, hold) - __field(int, pincount) - __field(unsigned, lockval) - __field(int, error) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = bp->b_target->bt_dev; - __entry->bno = bp->b_bn; - __entry->buffer_length = bp->b_buffer_length; - __entry->hold = atomic_read(&bp->b_hold); - __entry->pincount = atomic_read(&bp->b_pin_count); - __entry->lockval = bp->b_sema.count; - __entry->error = error; - __entry->flags = bp->b_flags; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d error %d flags %s caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long long)__entry->bno, - __entry->buffer_length, - __entry->hold, - __entry->pincount, - __entry->lockval, - __entry->error, - __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), - (void *)__entry->caller_ip) -); - -DECLARE_EVENT_CLASS(xfs_buf_item_class, - TP_PROTO(struct xfs_buf_log_item *bip), - TP_ARGS(bip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_daddr_t, buf_bno) - __field(size_t, buf_len) - __field(int, buf_hold) - __field(int, buf_pincount) - __field(int, buf_lockval) - __field(unsigned, buf_flags) - __field(unsigned, bli_recur) - __field(int, bli_refcount) - __field(unsigned, bli_flags) - __field(void *, li_desc) - __field(unsigned, li_flags) - ), - TP_fast_assign( - __entry->dev = bip->bli_buf->b_target->bt_dev; - __entry->bli_flags = bip->bli_flags; - __entry->bli_recur = bip->bli_recur; - __entry->bli_refcount = atomic_read(&bip->bli_refcount); - __entry->buf_bno = bip->bli_buf->b_bn; - __entry->buf_len = bip->bli_buf->b_buffer_length; - __entry->buf_flags = bip->bli_buf->b_flags; - __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); - __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); - __entry->buf_lockval = bip->bli_buf->b_sema.count; - __entry->li_desc = bip->bli_item.li_desc; - __entry->li_flags = bip->bli_item.li_flags; - ), - TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " - "lock %d flags %s recur %d refcount %d bliflags %s " - "lidesc 0x%p liflags %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long long)__entry->buf_bno, - __entry->buf_len, - __entry->buf_hold, - __entry->buf_pincount, - __entry->buf_lockval, - __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), - __entry->bli_recur, - __entry->bli_refcount, - __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), - __entry->li_desc, - __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) -) - -#define DEFINE_BUF_ITEM_EVENT(name) \ -DEFINE_EVENT(xfs_buf_item_class, name, \ - TP_PROTO(struct xfs_buf_log_item *bip), \ - TP_ARGS(bip)) -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); -DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); -DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); -DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); -DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); -DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur); -DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf); -DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur); -DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf); -DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse); -DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); -DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); -DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); -DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); - -DECLARE_EVENT_CLASS(xfs_lock_class, - TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, - unsigned long caller_ip), - TP_ARGS(ip, lock_flags, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(int, lock_flags) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->lock_flags = lock_flags; - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), - (void *)__entry->caller_ip) -) - -#define DEFINE_LOCK_EVENT(name) \ -DEFINE_EVENT(xfs_lock_class, name, \ - TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ - unsigned long caller_ip), \ - TP_ARGS(ip, lock_flags, caller_ip)) -DEFINE_LOCK_EVENT(xfs_ilock); -DEFINE_LOCK_EVENT(xfs_ilock_nowait); -DEFINE_LOCK_EVENT(xfs_ilock_demote); -DEFINE_LOCK_EVENT(xfs_iunlock); - -DECLARE_EVENT_CLASS(xfs_inode_class, - TP_PROTO(struct xfs_inode *ip), - TP_ARGS(ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - ), - TP_printk("dev %d:%d ino 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino) -) - -#define DEFINE_INODE_EVENT(name) \ -DEFINE_EVENT(xfs_inode_class, name, \ - TP_PROTO(struct xfs_inode *ip), \ - TP_ARGS(ip)) -DEFINE_INODE_EVENT(xfs_iget_skip); -DEFINE_INODE_EVENT(xfs_iget_reclaim); -DEFINE_INODE_EVENT(xfs_iget_reclaim_fail); -DEFINE_INODE_EVENT(xfs_iget_hit); -DEFINE_INODE_EVENT(xfs_iget_miss); - -DEFINE_INODE_EVENT(xfs_getattr); -DEFINE_INODE_EVENT(xfs_setattr); -DEFINE_INODE_EVENT(xfs_readlink); -DEFINE_INODE_EVENT(xfs_alloc_file_space); -DEFINE_INODE_EVENT(xfs_free_file_space); -DEFINE_INODE_EVENT(xfs_readdir); -#ifdef CONFIG_XFS_POSIX_ACL -DEFINE_INODE_EVENT(xfs_get_acl); -#endif -DEFINE_INODE_EVENT(xfs_vm_bmap); -DEFINE_INODE_EVENT(xfs_file_ioctl); -DEFINE_INODE_EVENT(xfs_file_compat_ioctl); -DEFINE_INODE_EVENT(xfs_ioctl_setattr); -DEFINE_INODE_EVENT(xfs_file_fsync); -DEFINE_INODE_EVENT(xfs_destroy_inode); -DEFINE_INODE_EVENT(xfs_write_inode); -DEFINE_INODE_EVENT(xfs_evict_inode); - -DEFINE_INODE_EVENT(xfs_dquot_dqalloc); -DEFINE_INODE_EVENT(xfs_dquot_dqdetach); - -DECLARE_EVENT_CLASS(xfs_iref_class, - TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), - TP_ARGS(ip, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(int, count) - __field(int, pincount) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->count = atomic_read(&VFS_I(ip)->i_count); - __entry->pincount = atomic_read(&ip->i_pincount); - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->count, - __entry->pincount, - (char *)__entry->caller_ip) -) - -#define DEFINE_IREF_EVENT(name) \ -DEFINE_EVENT(xfs_iref_class, name, \ - TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ - TP_ARGS(ip, caller_ip)) -DEFINE_IREF_EVENT(xfs_ihold); -DEFINE_IREF_EVENT(xfs_irele); -DEFINE_IREF_EVENT(xfs_inode_pin); -DEFINE_IREF_EVENT(xfs_inode_unpin); -DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); - -DECLARE_EVENT_CLASS(xfs_namespace_class, - TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), - TP_ARGS(dp, name), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, dp_ino) - __dynamic_array(char, name, name->len) - ), - TP_fast_assign( - __entry->dev = VFS_I(dp)->i_sb->s_dev; - __entry->dp_ino = dp->i_ino; - memcpy(__get_str(name), name->name, name->len); - ), - TP_printk("dev %d:%d dp ino 0x%llx name %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->dp_ino, - __get_str(name)) -) - -#define DEFINE_NAMESPACE_EVENT(name) \ -DEFINE_EVENT(xfs_namespace_class, name, \ - TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \ - TP_ARGS(dp, name)) -DEFINE_NAMESPACE_EVENT(xfs_remove); -DEFINE_NAMESPACE_EVENT(xfs_link); -DEFINE_NAMESPACE_EVENT(xfs_lookup); -DEFINE_NAMESPACE_EVENT(xfs_create); -DEFINE_NAMESPACE_EVENT(xfs_symlink); - -TRACE_EVENT(xfs_rename, - TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp, - struct xfs_name *src_name, struct xfs_name *target_name), - TP_ARGS(src_dp, target_dp, src_name, target_name), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, src_dp_ino) - __field(xfs_ino_t, target_dp_ino) - __dynamic_array(char, src_name, src_name->len) - __dynamic_array(char, target_name, target_name->len) - ), - TP_fast_assign( - __entry->dev = VFS_I(src_dp)->i_sb->s_dev; - __entry->src_dp_ino = src_dp->i_ino; - __entry->target_dp_ino = target_dp->i_ino; - memcpy(__get_str(src_name), src_name->name, src_name->len); - memcpy(__get_str(target_name), target_name->name, target_name->len); - ), - TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" - " src name %s target name %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->src_dp_ino, - __entry->target_dp_ino, - __get_str(src_name), - __get_str(target_name)) -) - -DECLARE_EVENT_CLASS(xfs_dquot_class, - TP_PROTO(struct xfs_dquot *dqp), - TP_ARGS(dqp), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(u32, id) - __field(unsigned, flags) - __field(unsigned, nrefs) - __field(unsigned long long, res_bcount) - __field(unsigned long long, bcount) - __field(unsigned long long, icount) - __field(unsigned long long, blk_hardlimit) - __field(unsigned long long, blk_softlimit) - __field(unsigned long long, ino_hardlimit) - __field(unsigned long long, ino_softlimit) - ), \ - TP_fast_assign( - __entry->dev = dqp->q_mount->m_super->s_dev; - __entry->id = be32_to_cpu(dqp->q_core.d_id); - __entry->flags = dqp->dq_flags; - __entry->nrefs = dqp->q_nrefs; - __entry->res_bcount = dqp->q_res_bcount; - __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); - __entry->icount = be64_to_cpu(dqp->q_core.d_icount); - __entry->blk_hardlimit = - be64_to_cpu(dqp->q_core.d_blk_hardlimit); - __entry->blk_softlimit = - be64_to_cpu(dqp->q_core.d_blk_softlimit); - __entry->ino_hardlimit = - be64_to_cpu(dqp->q_core.d_ino_hardlimit); - __entry->ino_softlimit = - be64_to_cpu(dqp->q_core.d_ino_softlimit); - ), - TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " - "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx " - "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->id, - __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), - __entry->nrefs, - __entry->res_bcount, - __entry->bcount, - __entry->blk_hardlimit, - __entry->blk_softlimit, - __entry->icount, - __entry->ino_hardlimit, - __entry->ino_softlimit) -) - -#define DEFINE_DQUOT_EVENT(name) \ -DEFINE_EVENT(xfs_dquot_class, name, \ - TP_PROTO(struct xfs_dquot *dqp), \ - TP_ARGS(dqp)) -DEFINE_DQUOT_EVENT(xfs_dqadjust); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); -DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); -DEFINE_DQUOT_EVENT(xfs_dqattach_found); -DEFINE_DQUOT_EVENT(xfs_dqattach_get); -DEFINE_DQUOT_EVENT(xfs_dqinit); -DEFINE_DQUOT_EVENT(xfs_dqreuse); -DEFINE_DQUOT_EVENT(xfs_dqalloc); -DEFINE_DQUOT_EVENT(xfs_dqtobp_read); -DEFINE_DQUOT_EVENT(xfs_dqread); -DEFINE_DQUOT_EVENT(xfs_dqread_fail); -DEFINE_DQUOT_EVENT(xfs_dqlookup_found); -DEFINE_DQUOT_EVENT(xfs_dqlookup_want); -DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); -DEFINE_DQUOT_EVENT(xfs_dqlookup_done); -DEFINE_DQUOT_EVENT(xfs_dqget_hit); -DEFINE_DQUOT_EVENT(xfs_dqget_miss); -DEFINE_DQUOT_EVENT(xfs_dqput); -DEFINE_DQUOT_EVENT(xfs_dqput_wait); -DEFINE_DQUOT_EVENT(xfs_dqput_free); -DEFINE_DQUOT_EVENT(xfs_dqrele); -DEFINE_DQUOT_EVENT(xfs_dqflush); -DEFINE_DQUOT_EVENT(xfs_dqflush_force); -DEFINE_DQUOT_EVENT(xfs_dqflush_done); - -DECLARE_EVENT_CLASS(xfs_loggrant_class, - TP_PROTO(struct log *log, struct xlog_ticket *tic), - TP_ARGS(log, tic), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(unsigned, trans_type) - __field(char, ocnt) - __field(char, cnt) - __field(int, curr_res) - __field(int, unit_res) - __field(unsigned int, flags) - __field(int, reserveq) - __field(int, writeq) - __field(int, grant_reserve_cycle) - __field(int, grant_reserve_bytes) - __field(int, grant_write_cycle) - __field(int, grant_write_bytes) - __field(int, curr_cycle) - __field(int, curr_block) - __field(xfs_lsn_t, tail_lsn) - ), - TP_fast_assign( - __entry->dev = log->l_mp->m_super->s_dev; - __entry->trans_type = tic->t_trans_type; - __entry->ocnt = tic->t_ocnt; - __entry->cnt = tic->t_cnt; - __entry->curr_res = tic->t_curr_res; - __entry->unit_res = tic->t_unit_res; - __entry->flags = tic->t_flags; - __entry->reserveq = list_empty(&log->l_reserveq); - __entry->writeq = list_empty(&log->l_writeq); - xlog_crack_grant_head(&log->l_grant_reserve_head, - &__entry->grant_reserve_cycle, - &__entry->grant_reserve_bytes); - xlog_crack_grant_head(&log->l_grant_write_head, - &__entry->grant_write_cycle, - &__entry->grant_write_bytes); - __entry->curr_cycle = log->l_curr_cycle; - __entry->curr_block = log->l_curr_block; - __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); - ), - TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " - "t_unit_res %u t_flags %s reserveq %s " - "writeq %s grant_reserve_cycle %d " - "grant_reserve_bytes %d grant_write_cycle %d " - "grant_write_bytes %d curr_cycle %d curr_block %d " - "tail_cycle %d tail_block %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), - __entry->ocnt, - __entry->cnt, - __entry->curr_res, - __entry->unit_res, - __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), - __entry->reserveq ? "empty" : "active", - __entry->writeq ? "empty" : "active", - __entry->grant_reserve_cycle, - __entry->grant_reserve_bytes, - __entry->grant_write_cycle, - __entry->grant_write_bytes, - __entry->curr_cycle, - __entry->curr_block, - CYCLE_LSN(__entry->tail_lsn), - BLOCK_LSN(__entry->tail_lsn) - ) -) - -#define DEFINE_LOGGRANT_EVENT(name) \ -DEFINE_EVENT(xfs_loggrant_class, name, \ - TP_PROTO(struct log *log, struct xlog_ticket *tic), \ - TP_ARGS(log, tic)) -DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); -DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); -DEFINE_LOGGRANT_EVENT(xfs_log_reserve); -DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_error); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); -DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); -DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); -DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); -DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); - -DECLARE_EVENT_CLASS(xfs_file_class, - TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), - TP_ARGS(ip, count, offset, flags), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsize_t, size) - __field(xfs_fsize_t, new_size) - __field(loff_t, offset) - __field(size_t, count) - __field(int, flags) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->new_size = ip->i_new_size; - __entry->offset = offset; - __entry->count = count; - __entry->flags = flags; - ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " - "offset 0x%llx count 0x%zx ioflags %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->new_size, - __entry->offset, - __entry->count, - __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) -) - -#define DEFINE_RW_EVENT(name) \ -DEFINE_EVENT(xfs_file_class, name, \ - TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ - TP_ARGS(ip, count, offset, flags)) -DEFINE_RW_EVENT(xfs_file_read); -DEFINE_RW_EVENT(xfs_file_buffered_write); -DEFINE_RW_EVENT(xfs_file_direct_write); -DEFINE_RW_EVENT(xfs_file_splice_read); -DEFINE_RW_EVENT(xfs_file_splice_write); - -DECLARE_EVENT_CLASS(xfs_page_class, - TP_PROTO(struct inode *inode, struct page *page, unsigned long off), - TP_ARGS(inode, page, off), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(pgoff_t, pgoff) - __field(loff_t, size) - __field(unsigned long, offset) - __field(int, delalloc) - __field(int, unwritten) - ), - TP_fast_assign( - int delalloc = -1, unwritten = -1; - - if (page_has_buffers(page)) - xfs_count_page_state(page, &delalloc, &unwritten); - __entry->dev = inode->i_sb->s_dev; - __entry->ino = XFS_I(inode)->i_ino; - __entry->pgoff = page_offset(page); - __entry->size = i_size_read(inode); - __entry->offset = off; - __entry->delalloc = delalloc; - __entry->unwritten = unwritten; - ), - TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " - "delalloc %d unwritten %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->pgoff, - __entry->size, - __entry->offset, - __entry->delalloc, - __entry->unwritten) -) - -#define DEFINE_PAGE_EVENT(name) \ -DEFINE_EVENT(xfs_page_class, name, \ - TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ - TP_ARGS(inode, page, off)) -DEFINE_PAGE_EVENT(xfs_writepage); -DEFINE_PAGE_EVENT(xfs_releasepage); -DEFINE_PAGE_EVENT(xfs_invalidatepage); - -DECLARE_EVENT_CLASS(xfs_imap_class, - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, - int type, struct xfs_bmbt_irec *irec), - TP_ARGS(ip, offset, count, type, irec), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(loff_t, size) - __field(loff_t, new_size) - __field(loff_t, offset) - __field(size_t, count) - __field(int, type) - __field(xfs_fileoff_t, startoff) - __field(xfs_fsblock_t, startblock) - __field(xfs_filblks_t, blockcount) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->new_size = ip->i_new_size; - __entry->offset = offset; - __entry->count = count; - __entry->type = type; - __entry->startoff = irec ? irec->br_startoff : 0; - __entry->startblock = irec ? irec->br_startblock : 0; - __entry->blockcount = irec ? irec->br_blockcount : 0; - ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " - "offset 0x%llx count %zd type %s " - "startoff 0x%llx startblock %lld blockcount 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->new_size, - __entry->offset, - __entry->count, - __print_symbolic(__entry->type, XFS_IO_TYPES), - __entry->startoff, - (__int64_t)__entry->startblock, - __entry->blockcount) -) - -#define DEFINE_IOMAP_EVENT(name) \ -DEFINE_EVENT(xfs_imap_class, name, \ - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ - int type, struct xfs_bmbt_irec *irec), \ - TP_ARGS(ip, offset, count, type, irec)) -DEFINE_IOMAP_EVENT(xfs_map_blocks_found); -DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); -DEFINE_IOMAP_EVENT(xfs_get_blocks_found); -DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); - -DECLARE_EVENT_CLASS(xfs_simple_io_class, - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), - TP_ARGS(ip, offset, count), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(loff_t, isize) - __field(loff_t, disize) - __field(loff_t, new_size) - __field(loff_t, offset) - __field(size_t, count) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->isize = ip->i_size; - __entry->disize = ip->i_d.di_size; - __entry->new_size = ip->i_new_size; - __entry->offset = offset; - __entry->count = count; - ), - TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " - "offset 0x%llx count %zd", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->isize, - __entry->disize, - __entry->new_size, - __entry->offset, - __entry->count) -); - -#define DEFINE_SIMPLE_IO_EVENT(name) \ -DEFINE_EVENT(xfs_simple_io_class, name, \ - TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ - TP_ARGS(ip, offset, count)) -DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); -DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); -DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); -DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); - -DECLARE_EVENT_CLASS(xfs_itrunc_class, - TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), - TP_ARGS(ip, new_size), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsize_t, size) - __field(xfs_fsize_t, new_size) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->new_size = new_size; - ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->new_size) -) - -#define DEFINE_ITRUNC_EVENT(name) \ -DEFINE_EVENT(xfs_itrunc_class, name, \ - TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ - TP_ARGS(ip, new_size)) -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); -DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); - -TRACE_EVENT(xfs_pagecache_inval, - TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), - TP_ARGS(ip, start, finish), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsize_t, size) - __field(xfs_off_t, start) - __field(xfs_off_t, finish) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->start = start; - __entry->finish = finish; - ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->start, - __entry->finish) -); - -TRACE_EVENT(xfs_bunmap, - TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, - int flags, unsigned long caller_ip), - TP_ARGS(ip, bno, len, flags, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(xfs_fsize_t, size) - __field(xfs_fileoff_t, bno) - __field(xfs_filblks_t, len) - __field(unsigned long, caller_ip) - __field(int, flags) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->size = ip->i_d.di_size; - __entry->bno = bno; - __entry->len = len; - __entry->caller_ip = caller_ip; - __entry->flags = flags; - ), - TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" - "flags %s caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->bno, - __entry->len, - __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS), - (void *)__entry->caller_ip) - -); - -DECLARE_EVENT_CLASS(xfs_busy_class, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t agbno, xfs_extlen_t len), - TP_ARGS(mp, agno, agbno, len), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, len) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->agbno = agbno; - __entry->len = len; - ), - TP_printk("dev %d:%d agno %u agbno %u len %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __entry->len) -); -#define DEFINE_BUSY_EVENT(name) \ -DEFINE_EVENT(xfs_busy_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ - xfs_agblock_t agbno, xfs_extlen_t len), \ - TP_ARGS(mp, agno, agbno, len)) -DEFINE_BUSY_EVENT(xfs_alloc_busy); -DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); -DEFINE_BUSY_EVENT(xfs_alloc_busy_force); -DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); -DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); - -TRACE_EVENT(xfs_alloc_busy_trim, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t agbno, xfs_extlen_t len, - xfs_agblock_t tbno, xfs_extlen_t tlen), - TP_ARGS(mp, agno, agbno, len, tbno, tlen), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, len) - __field(xfs_agblock_t, tbno) - __field(xfs_extlen_t, tlen) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->agbno = agbno; - __entry->len = len; - __entry->tbno = tbno; - __entry->tlen = tlen; - ), - TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __entry->len, - __entry->tbno, - __entry->tlen) -); - -TRACE_EVENT(xfs_trans_commit_lsn, - TP_PROTO(struct xfs_trans *trans), - TP_ARGS(trans), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(struct xfs_trans *, tp) - __field(xfs_lsn_t, lsn) - ), - TP_fast_assign( - __entry->dev = trans->t_mountp->m_super->s_dev; - __entry->tp = trans; - __entry->lsn = trans->t_commit_lsn; - ), - TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->tp, - __entry->lsn) -); - -TRACE_EVENT(xfs_agf, - TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, - unsigned long caller_ip), - TP_ARGS(mp, agf, flags, caller_ip), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(int, flags) - __field(__u32, length) - __field(__u32, bno_root) - __field(__u32, cnt_root) - __field(__u32, bno_level) - __field(__u32, cnt_level) - __field(__u32, flfirst) - __field(__u32, fllast) - __field(__u32, flcount) - __field(__u32, freeblks) - __field(__u32, longest) - __field(unsigned long, caller_ip) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = be32_to_cpu(agf->agf_seqno), - __entry->flags = flags; - __entry->length = be32_to_cpu(agf->agf_length), - __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]), - __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]), - __entry->bno_level = - be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]), - __entry->cnt_level = - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]), - __entry->flfirst = be32_to_cpu(agf->agf_flfirst), - __entry->fllast = be32_to_cpu(agf->agf_fllast), - __entry->flcount = be32_to_cpu(agf->agf_flcount), - __entry->freeblks = be32_to_cpu(agf->agf_freeblks), - __entry->longest = be32_to_cpu(agf->agf_longest); - __entry->caller_ip = caller_ip; - ), - TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " - "levels b %u c %u flfirst %u fllast %u flcount %u " - "freeblks %u longest %u caller %pf", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), - __entry->length, - __entry->bno_root, - __entry->cnt_root, - __entry->bno_level, - __entry->cnt_level, - __entry->flfirst, - __entry->fllast, - __entry->flcount, - __entry->freeblks, - __entry->longest, - (void *)__entry->caller_ip) -); - -TRACE_EVENT(xfs_free_extent, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, - xfs_extlen_t len, bool isfl, int haveleft, int haveright), - TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, len) - __field(int, isfl) - __field(int, haveleft) - __field(int, haveright) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->agbno = agbno; - __entry->len = len; - __entry->isfl = isfl; - __entry->haveleft = haveleft; - __entry->haveright = haveright; - ), - TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __entry->len, - __entry->isfl, - __entry->haveleft ? - (__entry->haveright ? "both" : "left") : - (__entry->haveright ? "right" : "none")) - -); - -DECLARE_EVENT_CLASS(xfs_alloc_class, - TP_PROTO(struct xfs_alloc_arg *args), - TP_ARGS(args), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, minlen) - __field(xfs_extlen_t, maxlen) - __field(xfs_extlen_t, mod) - __field(xfs_extlen_t, prod) - __field(xfs_extlen_t, minleft) - __field(xfs_extlen_t, total) - __field(xfs_extlen_t, alignment) - __field(xfs_extlen_t, minalignslop) - __field(xfs_extlen_t, len) - __field(short, type) - __field(short, otype) - __field(char, wasdel) - __field(char, wasfromfl) - __field(char, isfl) - __field(char, userdata) - __field(xfs_fsblock_t, firstblock) - ), - TP_fast_assign( - __entry->dev = args->mp->m_super->s_dev; - __entry->agno = args->agno; - __entry->agbno = args->agbno; - __entry->minlen = args->minlen; - __entry->maxlen = args->maxlen; - __entry->mod = args->mod; - __entry->prod = args->prod; - __entry->minleft = args->minleft; - __entry->total = args->total; - __entry->alignment = args->alignment; - __entry->minalignslop = args->minalignslop; - __entry->len = args->len; - __entry->type = args->type; - __entry->otype = args->otype; - __entry->wasdel = args->wasdel; - __entry->wasfromfl = args->wasfromfl; - __entry->isfl = args->isfl; - __entry->userdata = args->userdata; - __entry->firstblock = args->firstblock; - ), - TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " - "prod %u minleft %u total %u alignment %u minalignslop %u " - "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " - "userdata %d firstblock 0x%llx", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __entry->minlen, - __entry->maxlen, - __entry->mod, - __entry->prod, - __entry->minleft, - __entry->total, - __entry->alignment, - __entry->minalignslop, - __entry->len, - __print_symbolic(__entry->type, XFS_ALLOC_TYPES), - __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), - __entry->wasdel, - __entry->wasfromfl, - __entry->isfl, - __entry->userdata, - (unsigned long long)__entry->firstblock) -) - -#define DEFINE_ALLOC_EVENT(name) \ -DEFINE_EVENT(xfs_alloc_class, name, \ - TP_PROTO(struct xfs_alloc_arg *args), \ - TP_ARGS(args)) -DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); -DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); -DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); -DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); -DEFINE_ALLOC_EVENT(xfs_alloc_near_first); -DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); -DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); -DEFINE_ALLOC_EVENT(xfs_alloc_near_error); -DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); -DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); -DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); -DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); -DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); -DEFINE_ALLOC_EVENT(xfs_alloc_size_done); -DEFINE_ALLOC_EVENT(xfs_alloc_size_error); -DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); -DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); -DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); -DEFINE_ALLOC_EVENT(xfs_alloc_small_done); -DEFINE_ALLOC_EVENT(xfs_alloc_small_error); -DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs); -DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix); -DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); -DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); -DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); - -DECLARE_EVENT_CLASS(xfs_dir2_class, - TP_PROTO(struct xfs_da_args *args), - TP_ARGS(args), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __dynamic_array(char, name, args->namelen) - __field(int, namelen) - __field(xfs_dahash_t, hashval) - __field(xfs_ino_t, inumber) - __field(int, op_flags) - ), - TP_fast_assign( - __entry->dev = VFS_I(args->dp)->i_sb->s_dev; - __entry->ino = args->dp->i_ino; - if (args->namelen) - memcpy(__get_str(name), args->name, args->namelen); - __entry->namelen = args->namelen; - __entry->hashval = args->hashval; - __entry->inumber = args->inumber; - __entry->op_flags = args->op_flags; - ), - TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " - "inumber 0x%llx op_flags %s", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->namelen, - __entry->namelen ? __get_str(name) : NULL, - __entry->namelen, - __entry->hashval, - __entry->inumber, - __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) -) - -#define DEFINE_DIR2_EVENT(name) \ -DEFINE_EVENT(xfs_dir2_class, name, \ - TP_PROTO(struct xfs_da_args *args), \ - TP_ARGS(args)) -DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); -DEFINE_DIR2_EVENT(xfs_dir2_sf_create); -DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup); -DEFINE_DIR2_EVENT(xfs_dir2_sf_replace); -DEFINE_DIR2_EVENT(xfs_dir2_sf_removename); -DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4); -DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8); -DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block); -DEFINE_DIR2_EVENT(xfs_dir2_block_addname); -DEFINE_DIR2_EVENT(xfs_dir2_block_lookup); -DEFINE_DIR2_EVENT(xfs_dir2_block_replace); -DEFINE_DIR2_EVENT(xfs_dir2_block_removename); -DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf); -DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block); -DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node); -DEFINE_DIR2_EVENT(xfs_dir2_node_addname); -DEFINE_DIR2_EVENT(xfs_dir2_node_lookup); -DEFINE_DIR2_EVENT(xfs_dir2_node_replace); -DEFINE_DIR2_EVENT(xfs_dir2_node_removename); -DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); - -DECLARE_EVENT_CLASS(xfs_dir2_space_class, - TP_PROTO(struct xfs_da_args *args, int idx), - TP_ARGS(args, idx), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(int, op_flags) - __field(int, idx) - ), - TP_fast_assign( - __entry->dev = VFS_I(args->dp)->i_sb->s_dev; - __entry->ino = args->dp->i_ino; - __entry->op_flags = args->op_flags; - __entry->idx = idx; - ), - TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), - __entry->idx) -) - -#define DEFINE_DIR2_SPACE_EVENT(name) \ -DEFINE_EVENT(xfs_dir2_space_class, name, \ - TP_PROTO(struct xfs_da_args *args, int idx), \ - TP_ARGS(args, idx)) -DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add); -DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove); -DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode); -DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode); - -TRACE_EVENT(xfs_dir2_leafn_moveents, - TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), - TP_ARGS(args, src_idx, dst_idx, count), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(int, op_flags) - __field(int, src_idx) - __field(int, dst_idx) - __field(int, count) - ), - TP_fast_assign( - __entry->dev = VFS_I(args->dp)->i_sb->s_dev; - __entry->ino = args->dp->i_ino; - __entry->op_flags = args->op_flags; - __entry->src_idx = src_idx; - __entry->dst_idx = dst_idx; - __entry->count = count; - ), - TP_printk("dev %d:%d ino 0x%llx op_flags %s " - "src_idx %d dst_idx %d count %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), - __entry->src_idx, - __entry->dst_idx, - __entry->count) -); - -#define XFS_SWAPEXT_INODES \ - { 0, "target" }, \ - { 1, "temp" } - -#define XFS_INODE_FORMAT_STR \ - { 0, "invalid" }, \ - { 1, "local" }, \ - { 2, "extent" }, \ - { 3, "btree" } - -DECLARE_EVENT_CLASS(xfs_swap_extent_class, - TP_PROTO(struct xfs_inode *ip, int which), - TP_ARGS(ip, which), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(int, which) - __field(xfs_ino_t, ino) - __field(int, format) - __field(int, nex) - __field(int, max_nex) - __field(int, broot_size) - __field(int, fork_off) - ), - TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->which = which; - __entry->ino = ip->i_ino; - __entry->format = ip->i_d.di_format; - __entry->nex = ip->i_d.di_nextents; - __entry->max_nex = ip->i_df.if_ext_max; - __entry->broot_size = ip->i_df.if_broot_bytes; - __entry->fork_off = XFS_IFORK_BOFF(ip); - ), - TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " - "Max in-fork extents %d, broot size %d, fork offset %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), - __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), - __entry->nex, - __entry->max_nex, - __entry->broot_size, - __entry->fork_off) -) - -#define DEFINE_SWAPEXT_EVENT(name) \ -DEFINE_EVENT(xfs_swap_extent_class, name, \ - TP_PROTO(struct xfs_inode *ip, int which), \ - TP_ARGS(ip, which)) - -DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); -DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); - -DECLARE_EVENT_CLASS(xfs_log_recover_item_class, - TP_PROTO(struct log *log, struct xlog_recover *trans, - struct xlog_recover_item *item, int pass), - TP_ARGS(log, trans, item, pass), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(unsigned long, item) - __field(xlog_tid_t, tid) - __field(int, type) - __field(int, pass) - __field(int, count) - __field(int, total) - ), - TP_fast_assign( - __entry->dev = log->l_mp->m_super->s_dev; - __entry->item = (unsigned long)item; - __entry->tid = trans->r_log_tid; - __entry->type = ITEM_TYPE(item); - __entry->pass = pass; - __entry->count = item->ri_cnt; - __entry->total = item->ri_total; - ), - TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s " - "item region count/total %d/%d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->tid, - __entry->pass, - (void *)__entry->item, - __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), - __entry->count, - __entry->total) -) - -#define DEFINE_LOG_RECOVER_ITEM(name) \ -DEFINE_EVENT(xfs_log_recover_item_class, name, \ - TP_PROTO(struct log *log, struct xlog_recover *trans, \ - struct xlog_recover_item *item, int pass), \ - TP_ARGS(log, trans, item, pass)) - -DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add); -DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont); -DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head); -DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail); -DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover); - -DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, - TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), - TP_ARGS(log, buf_f), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(__int64_t, blkno) - __field(unsigned short, len) - __field(unsigned short, flags) - __field(unsigned short, size) - __field(unsigned int, map_size) - ), - TP_fast_assign( - __entry->dev = log->l_mp->m_super->s_dev; - __entry->blkno = buf_f->blf_blkno; - __entry->len = buf_f->blf_len; - __entry->flags = buf_f->blf_flags; - __entry->size = buf_f->blf_size; - __entry->map_size = buf_f->blf_map_size; - ), - TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, " - "map_size %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->blkno, - __entry->len, - __entry->flags, - __entry->size, - __entry->map_size) -) - -#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \ -DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ - TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \ - TP_ARGS(log, buf_f)) - -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf); -DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); - -DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, - TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), - TP_ARGS(log, in_f), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_ino_t, ino) - __field(unsigned short, size) - __field(int, fields) - __field(unsigned short, asize) - __field(unsigned short, dsize) - __field(__int64_t, blkno) - __field(int, len) - __field(int, boffset) - ), - TP_fast_assign( - __entry->dev = log->l_mp->m_super->s_dev; - __entry->ino = in_f->ilf_ino; - __entry->size = in_f->ilf_size; - __entry->fields = in_f->ilf_fields; - __entry->asize = in_f->ilf_asize; - __entry->dsize = in_f->ilf_dsize; - __entry->blkno = in_f->ilf_blkno; - __entry->len = in_f->ilf_len; - __entry->boffset = in_f->ilf_boffset; - ), - TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, " - "dsize %d, blkno 0x%llx, len %d, boffset %d", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->ino, - __entry->size, - __entry->fields, - __entry->asize, - __entry->dsize, - __entry->blkno, - __entry->len, - __entry->boffset) -) -#define DEFINE_LOG_RECOVER_INO_ITEM(name) \ -DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ - TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \ - TP_ARGS(log, in_f)) - -DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); -DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); -DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); - -DECLARE_EVENT_CLASS(xfs_discard_class, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t agbno, xfs_extlen_t len), - TP_ARGS(mp, agno, agbno, len), - TP_STRUCT__entry( - __field(dev_t, dev) - __field(xfs_agnumber_t, agno) - __field(xfs_agblock_t, agbno) - __field(xfs_extlen_t, len) - ), - TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->agbno = agbno; - __entry->len = len; - ), - TP_printk("dev %d:%d agno %u agbno %u len %u\n", - MAJOR(__entry->dev), MINOR(__entry->dev), - __entry->agno, - __entry->agbno, - __entry->len) -) - -#define DEFINE_DISCARD_EVENT(name) \ -DEFINE_EVENT(xfs_discard_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ - xfs_agblock_t agbno, xfs_extlen_t len), \ - TP_ARGS(mp, agno, agbno, len)) -DEFINE_DISCARD_EVENT(xfs_discard_extent); -DEFINE_DISCARD_EVENT(xfs_discard_toosmall); -DEFINE_DISCARD_EVENT(xfs_discard_exclude); -DEFINE_DISCARD_EVENT(xfs_discard_busy); - -#endif /* _TRACE_XFS_H */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE xfs_trace -#include diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h deleted file mode 100644 index 7c220b4227bc..000000000000 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_VNODE_H__ -#define __XFS_VNODE_H__ - -#include "xfs_fs.h" - -struct file; -struct xfs_inode; -struct xfs_iomap; -struct attrlist_cursor_kern; - -/* - * Return values for xfs_inactive. A return value of - * VN_INACTIVE_NOCACHE implies that the file system behavior - * has disassociated its state and bhv_desc_t from the vnode. - */ -#define VN_INACTIVE_CACHE 0 -#define VN_INACTIVE_NOCACHE 1 - -/* - * Flags for read/write calls - same values as IRIX - */ -#define IO_ISDIRECT 0x00004 /* bypass page cache */ -#define IO_INVIS 0x00020 /* don't update inode timestamps */ - -#define XFS_IO_FLAGS \ - { IO_ISDIRECT, "DIRECT" }, \ - { IO_INVIS, "INVIS"} - -/* - * Flush/Invalidate options for vop_toss/flush/flushinval_pages. - */ -#define FI_NONE 0 /* none */ -#define FI_REMAPF 1 /* Do a remapf prior to the operation */ -#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation. - Prevent VM access to the pages until - the operation completes. */ - -/* - * Some useful predicates. - */ -#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) -#define VN_CACHED(vp) (vp->i_mapping->nrpages) -#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \ - PAGECACHE_TAG_DIRTY) - - -#endif /* __XFS_VNODE_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c deleted file mode 100644 index 87d3e03878c8..000000000000 --- a/fs/xfs/linux-2.6/xfs_xattr.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (C) 2008 Christoph Hellwig. - * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "xfs.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_attr.h" -#include "xfs_attr_leaf.h" -#include "xfs_acl.h" -#include "xfs_vnodeops.h" - -#include -#include - - -static int -xfs_xattr_get(struct dentry *dentry, const char *name, - void *value, size_t size, int xflags) -{ - struct xfs_inode *ip = XFS_I(dentry->d_inode); - int error, asize = size; - - if (strcmp(name, "") == 0) - return -EINVAL; - - /* Convert Linux syscall to XFS internal ATTR flags */ - if (!size) { - xflags |= ATTR_KERNOVAL; - value = NULL; - } - - error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); - if (error) - return error; - return asize; -} - -static int -xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags, int xflags) -{ - struct xfs_inode *ip = XFS_I(dentry->d_inode); - - if (strcmp(name, "") == 0) - return -EINVAL; - - /* Convert Linux syscall to XFS internal ATTR flags */ - if (flags & XATTR_CREATE) - xflags |= ATTR_CREATE; - if (flags & XATTR_REPLACE) - xflags |= ATTR_REPLACE; - - if (!value) - return -xfs_attr_remove(ip, (unsigned char *)name, xflags); - return -xfs_attr_set(ip, (unsigned char *)name, - (void *)value, size, xflags); -} - -static const struct xattr_handler xfs_xattr_user_handler = { - .prefix = XATTR_USER_PREFIX, - .flags = 0, /* no flags implies user namespace */ - .get = xfs_xattr_get, - .set = xfs_xattr_set, -}; - -static const struct xattr_handler xfs_xattr_trusted_handler = { - .prefix = XATTR_TRUSTED_PREFIX, - .flags = ATTR_ROOT, - .get = xfs_xattr_get, - .set = xfs_xattr_set, -}; - -static const struct xattr_handler xfs_xattr_security_handler = { - .prefix = XATTR_SECURITY_PREFIX, - .flags = ATTR_SECURE, - .get = xfs_xattr_get, - .set = xfs_xattr_set, -}; - -const struct xattr_handler *xfs_xattr_handlers[] = { - &xfs_xattr_user_handler, - &xfs_xattr_trusted_handler, - &xfs_xattr_security_handler, -#ifdef CONFIG_XFS_POSIX_ACL - &xfs_xattr_acl_access_handler, - &xfs_xattr_acl_default_handler, -#endif - NULL -}; - -static unsigned int xfs_xattr_prefix_len(int flags) -{ - if (flags & XFS_ATTR_SECURE) - return sizeof("security"); - else if (flags & XFS_ATTR_ROOT) - return sizeof("trusted"); - else - return sizeof("user"); -} - -static const char *xfs_xattr_prefix(int flags) -{ - if (flags & XFS_ATTR_SECURE) - return xfs_xattr_security_handler.prefix; - else if (flags & XFS_ATTR_ROOT) - return xfs_xattr_trusted_handler.prefix; - else - return xfs_xattr_user_handler.prefix; -} - -static int -xfs_xattr_put_listent( - struct xfs_attr_list_context *context, - int flags, - unsigned char *name, - int namelen, - int valuelen, - unsigned char *value) -{ - unsigned int prefix_len = xfs_xattr_prefix_len(flags); - char *offset; - int arraytop; - - ASSERT(context->count >= 0); - - /* - * Only show root namespace entries if we are actually allowed to - * see them. - */ - if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN)) - return 0; - - arraytop = context->count + prefix_len + namelen + 1; - if (arraytop > context->firstu) { - context->count = -1; /* insufficient space */ - return 1; - } - offset = (char *)context->alist + context->count; - strncpy(offset, xfs_xattr_prefix(flags), prefix_len); - offset += prefix_len; - strncpy(offset, (char *)name, namelen); /* real name */ - offset += namelen; - *offset = '\0'; - context->count += prefix_len + namelen + 1; - return 0; -} - -static int -xfs_xattr_put_listent_sizes( - struct xfs_attr_list_context *context, - int flags, - unsigned char *name, - int namelen, - int valuelen, - unsigned char *value) -{ - context->count += xfs_xattr_prefix_len(flags) + namelen + 1; - return 0; -} - -static int -list_one_attr(const char *name, const size_t len, void *data, - size_t size, ssize_t *result) -{ - char *p = data + *result; - - *result += len; - if (!size) - return 0; - if (*result > size) - return -ERANGE; - - strcpy(p, name); - return 0; -} - -ssize_t -xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) -{ - struct xfs_attr_list_context context; - struct attrlist_cursor_kern cursor = { 0 }; - struct inode *inode = dentry->d_inode; - int error; - - /* - * First read the regular on-disk attributes. - */ - memset(&context, 0, sizeof(context)); - context.dp = XFS_I(inode); - context.cursor = &cursor; - context.resynch = 1; - context.alist = data; - context.bufsize = size; - context.firstu = context.bufsize; - - if (size) - context.put_listent = xfs_xattr_put_listent; - else - context.put_listent = xfs_xattr_put_listent_sizes; - - xfs_attr_list_int(&context); - if (context.count < 0) - return -ERANGE; - - /* - * Then add the two synthetic ACL attributes. - */ - if (posix_acl_access_exists(inode)) { - error = list_one_attr(POSIX_ACL_XATTR_ACCESS, - strlen(POSIX_ACL_XATTR_ACCESS) + 1, - data, size, &context.count); - if (error) - return error; - } - - if (posix_acl_default_exists(inode)) { - error = list_one_attr(POSIX_ACL_XATTR_DEFAULT, - strlen(POSIX_ACL_XATTR_DEFAULT) + 1, - data, size, &context.count); - if (error) - return error; - } - - return context.count; -} diff --git a/fs/xfs/mrlock.h b/fs/xfs/mrlock.h new file mode 100644 index 000000000000..ff6a19873e5c --- /dev/null +++ b/fs/xfs/mrlock.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPPORT_MRLOCK_H__ +#define __XFS_SUPPORT_MRLOCK_H__ + +#include + +typedef struct { + struct rw_semaphore mr_lock; +#ifdef DEBUG + int mr_writer; +#endif +} mrlock_t; + +#ifdef DEBUG +#define mrinit(mrp, name) \ + do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0) +#else +#define mrinit(mrp, name) \ + do { init_rwsem(&(mrp)->mr_lock); } while (0) +#endif + +#define mrlock_init(mrp, t,n,s) mrinit(mrp, n) +#define mrfree(mrp) do { } while (0) + +static inline void mraccess_nested(mrlock_t *mrp, int subclass) +{ + down_read_nested(&mrp->mr_lock, subclass); +} + +static inline void mrupdate_nested(mrlock_t *mrp, int subclass) +{ + down_write_nested(&mrp->mr_lock, subclass); +#ifdef DEBUG + mrp->mr_writer = 1; +#endif +} + +static inline int mrtryaccess(mrlock_t *mrp) +{ + return down_read_trylock(&mrp->mr_lock); +} + +static inline int mrtryupdate(mrlock_t *mrp) +{ + if (!down_write_trylock(&mrp->mr_lock)) + return 0; +#ifdef DEBUG + mrp->mr_writer = 1; +#endif + return 1; +} + +static inline void mrunlock_excl(mrlock_t *mrp) +{ +#ifdef DEBUG + mrp->mr_writer = 0; +#endif + up_write(&mrp->mr_lock); +} + +static inline void mrunlock_shared(mrlock_t *mrp) +{ + up_read(&mrp->mr_lock); +} + +static inline void mrdemote(mrlock_t *mrp) +{ +#ifdef DEBUG + mrp->mr_writer = 0; +#endif + downgrade_write(&mrp->mr_lock); +} + +#endif /* __XFS_SUPPORT_MRLOCK_H__ */ diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c deleted file mode 100644 index db62959bed13..000000000000 --- a/fs/xfs/quota/xfs_dquot.c +++ /dev/null @@ -1,1454 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_trans_space.h" -#include "xfs_trans_priv.h" -#include "xfs_qm.h" -#include "xfs_trace.h" - - -/* - LOCK ORDER - - inode lock (ilock) - dquot hash-chain lock (hashlock) - xqm dquot freelist lock (freelistlock - mount's dquot list lock (mplistlock) - user dquot lock - lock ordering among dquots is based on the uid or gid - group dquot lock - similar to udquots. Between the two dquots, the udquot - has to be locked first. - pin lock - the dquot lock must be held to take this lock. - flush lock - ditto. -*/ - -#ifdef DEBUG -xfs_buftarg_t *xfs_dqerror_target; -int xfs_do_dqerror; -int xfs_dqreq_num; -int xfs_dqerror_mod = 33; -#endif - -static struct lock_class_key xfs_dquot_other_class; - -/* - * Allocate and initialize a dquot. We don't always allocate fresh memory; - * we try to reclaim a free dquot if the number of incore dquots are above - * a threshold. - * The only field inside the core that gets initialized at this point - * is the d_id field. The idea is to fill in the entire q_core - * when we read in the on disk dquot. - */ -STATIC xfs_dquot_t * -xfs_qm_dqinit( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type) -{ - xfs_dquot_t *dqp; - boolean_t brandnewdquot; - - brandnewdquot = xfs_qm_dqalloc_incore(&dqp); - dqp->dq_flags = type; - dqp->q_core.d_id = cpu_to_be32(id); - dqp->q_mount = mp; - - /* - * No need to re-initialize these if this is a reclaimed dquot. - */ - if (brandnewdquot) { - INIT_LIST_HEAD(&dqp->q_freelist); - mutex_init(&dqp->q_qlock); - init_waitqueue_head(&dqp->q_pinwait); - - /* - * Because we want to use a counting completion, complete - * the flush completion once to allow a single access to - * the flush completion without blocking. - */ - init_completion(&dqp->q_flush); - complete(&dqp->q_flush); - - trace_xfs_dqinit(dqp); - } else { - /* - * Only the q_core portion was zeroed in dqreclaim_one(). - * So, we need to reset others. - */ - dqp->q_nrefs = 0; - dqp->q_blkno = 0; - INIT_LIST_HEAD(&dqp->q_mplist); - INIT_LIST_HEAD(&dqp->q_hashlist); - dqp->q_bufoffset = 0; - dqp->q_fileoffset = 0; - dqp->q_transp = NULL; - dqp->q_gdquot = NULL; - dqp->q_res_bcount = 0; - dqp->q_res_icount = 0; - dqp->q_res_rtbcount = 0; - atomic_set(&dqp->q_pincount, 0); - dqp->q_hash = NULL; - ASSERT(list_empty(&dqp->q_freelist)); - - trace_xfs_dqreuse(dqp); - } - - /* - * In either case we need to make sure group quotas have a different - * lock class than user quotas, to make sure lockdep knows we can - * locks of one of each at the same time. - */ - if (!(type & XFS_DQ_USER)) - lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); - - /* - * log item gets initialized later - */ - return (dqp); -} - -/* - * This is called to free all the memory associated with a dquot - */ -void -xfs_qm_dqdestroy( - xfs_dquot_t *dqp) -{ - ASSERT(list_empty(&dqp->q_freelist)); - - mutex_destroy(&dqp->q_qlock); - kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); - - atomic_dec(&xfs_Gqm->qm_totaldquots); -} - -/* - * This is what a 'fresh' dquot inside a dquot chunk looks like on disk. - */ -STATIC void -xfs_qm_dqinit_core( - xfs_dqid_t id, - uint type, - xfs_dqblk_t *d) -{ - /* - * Caller has zero'd the entire dquot 'chunk' already. - */ - d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); - d->dd_diskdq.d_version = XFS_DQUOT_VERSION; - d->dd_diskdq.d_id = cpu_to_be32(id); - d->dd_diskdq.d_flags = type; -} - -/* - * If default limits are in force, push them into the dquot now. - * We overwrite the dquot limits only if they are zero and this - * is not the root dquot. - */ -void -xfs_qm_adjust_dqlimits( - xfs_mount_t *mp, - xfs_disk_dquot_t *d) -{ - xfs_quotainfo_t *q = mp->m_quotainfo; - - ASSERT(d->d_id); - - if (q->qi_bsoftlimit && !d->d_blk_softlimit) - d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); - if (q->qi_bhardlimit && !d->d_blk_hardlimit) - d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); - if (q->qi_isoftlimit && !d->d_ino_softlimit) - d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); - if (q->qi_ihardlimit && !d->d_ino_hardlimit) - d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); - if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) - d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); - if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) - d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); -} - -/* - * Check the limits and timers of a dquot and start or reset timers - * if necessary. - * This gets called even when quota enforcement is OFF, which makes our - * life a little less complicated. (We just don't reject any quota - * reservations in that case, when enforcement is off). - * We also return 0 as the values of the timers in Q_GETQUOTA calls, when - * enforcement's off. - * In contrast, warnings are a little different in that they don't - * 'automatically' get started when limits get exceeded. They do - * get reset to zero, however, when we find the count to be under - * the soft limit (they are only ever set non-zero via userspace). - */ -void -xfs_qm_adjust_dqtimers( - xfs_mount_t *mp, - xfs_disk_dquot_t *d) -{ - ASSERT(d->d_id); - -#ifdef DEBUG - if (d->d_blk_hardlimit) - ASSERT(be64_to_cpu(d->d_blk_softlimit) <= - be64_to_cpu(d->d_blk_hardlimit)); - if (d->d_ino_hardlimit) - ASSERT(be64_to_cpu(d->d_ino_softlimit) <= - be64_to_cpu(d->d_ino_hardlimit)); - if (d->d_rtb_hardlimit) - ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= - be64_to_cpu(d->d_rtb_hardlimit)); -#endif - - if (!d->d_btimer) { - if ((d->d_blk_softlimit && - (be64_to_cpu(d->d_bcount) >= - be64_to_cpu(d->d_blk_softlimit))) || - (d->d_blk_hardlimit && - (be64_to_cpu(d->d_bcount) >= - be64_to_cpu(d->d_blk_hardlimit)))) { - d->d_btimer = cpu_to_be32(get_seconds() + - mp->m_quotainfo->qi_btimelimit); - } else { - d->d_bwarns = 0; - } - } else { - if ((!d->d_blk_softlimit || - (be64_to_cpu(d->d_bcount) < - be64_to_cpu(d->d_blk_softlimit))) && - (!d->d_blk_hardlimit || - (be64_to_cpu(d->d_bcount) < - be64_to_cpu(d->d_blk_hardlimit)))) { - d->d_btimer = 0; - } - } - - if (!d->d_itimer) { - if ((d->d_ino_softlimit && - (be64_to_cpu(d->d_icount) >= - be64_to_cpu(d->d_ino_softlimit))) || - (d->d_ino_hardlimit && - (be64_to_cpu(d->d_icount) >= - be64_to_cpu(d->d_ino_hardlimit)))) { - d->d_itimer = cpu_to_be32(get_seconds() + - mp->m_quotainfo->qi_itimelimit); - } else { - d->d_iwarns = 0; - } - } else { - if ((!d->d_ino_softlimit || - (be64_to_cpu(d->d_icount) < - be64_to_cpu(d->d_ino_softlimit))) && - (!d->d_ino_hardlimit || - (be64_to_cpu(d->d_icount) < - be64_to_cpu(d->d_ino_hardlimit)))) { - d->d_itimer = 0; - } - } - - if (!d->d_rtbtimer) { - if ((d->d_rtb_softlimit && - (be64_to_cpu(d->d_rtbcount) >= - be64_to_cpu(d->d_rtb_softlimit))) || - (d->d_rtb_hardlimit && - (be64_to_cpu(d->d_rtbcount) >= - be64_to_cpu(d->d_rtb_hardlimit)))) { - d->d_rtbtimer = cpu_to_be32(get_seconds() + - mp->m_quotainfo->qi_rtbtimelimit); - } else { - d->d_rtbwarns = 0; - } - } else { - if ((!d->d_rtb_softlimit || - (be64_to_cpu(d->d_rtbcount) < - be64_to_cpu(d->d_rtb_softlimit))) && - (!d->d_rtb_hardlimit || - (be64_to_cpu(d->d_rtbcount) < - be64_to_cpu(d->d_rtb_hardlimit)))) { - d->d_rtbtimer = 0; - } - } -} - -/* - * initialize a buffer full of dquots and log the whole thing - */ -STATIC void -xfs_qm_init_dquot_blk( - xfs_trans_t *tp, - xfs_mount_t *mp, - xfs_dqid_t id, - uint type, - xfs_buf_t *bp) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - xfs_dqblk_t *d; - int curid, i; - - ASSERT(tp); - ASSERT(xfs_buf_islocked(bp)); - - d = bp->b_addr; - - /* - * ID of the first dquot in the block - id's are zero based. - */ - curid = id - (id % q->qi_dqperchunk); - ASSERT(curid >= 0); - memset(d, 0, BBTOB(q->qi_dqchunklen)); - for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) - xfs_qm_dqinit_core(curid, type, d); - xfs_trans_dquot_buf(tp, bp, - (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : - ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : - XFS_BLF_GDQUOT_BUF))); - xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); -} - - - -/* - * Allocate a block and fill it with dquots. - * This is called when the bmapi finds a hole. - */ -STATIC int -xfs_qm_dqalloc( - xfs_trans_t **tpp, - xfs_mount_t *mp, - xfs_dquot_t *dqp, - xfs_inode_t *quotip, - xfs_fileoff_t offset_fsb, - xfs_buf_t **O_bpp) -{ - xfs_fsblock_t firstblock; - xfs_bmap_free_t flist; - xfs_bmbt_irec_t map; - int nmaps, error, committed; - xfs_buf_t *bp; - xfs_trans_t *tp = *tpp; - - ASSERT(tp != NULL); - - trace_xfs_dqalloc(dqp); - - /* - * Initialize the bmap freelist prior to calling bmapi code. - */ - xfs_bmap_init(&flist, &firstblock); - xfs_ilock(quotip, XFS_ILOCK_EXCL); - /* - * Return if this type of quotas is turned off while we didn't - * have an inode lock - */ - if (XFS_IS_THIS_QUOTA_OFF(dqp)) { - xfs_iunlock(quotip, XFS_ILOCK_EXCL); - return (ESRCH); - } - - xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL); - nmaps = 1; - if ((error = xfs_bmapi(tp, quotip, - offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, - XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, - &firstblock, - XFS_QM_DQALLOC_SPACE_RES(mp), - &map, &nmaps, &flist))) { - goto error0; - } - ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); - ASSERT(nmaps == 1); - ASSERT((map.br_startblock != DELAYSTARTBLOCK) && - (map.br_startblock != HOLESTARTBLOCK)); - - /* - * Keep track of the blkno to save a lookup later - */ - dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); - - /* now we can just get the buffer (there's nothing to read yet) */ - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, - dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, - 0); - if (!bp || (error = xfs_buf_geterror(bp))) - goto error1; - /* - * Make a chunk of dquots out of this buffer and log - * the entire thing. - */ - xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), - dqp->dq_flags & XFS_DQ_ALLTYPES, bp); - - /* - * xfs_bmap_finish() may commit the current transaction and - * start a second transaction if the freelist is not empty. - * - * Since we still want to modify this buffer, we need to - * ensure that the buffer is not released on commit of - * the first transaction and ensure the buffer is added to the - * second transaction. - * - * If there is only one transaction then don't stop the buffer - * from being released when it commits later on. - */ - - xfs_trans_bhold(tp, bp); - - if ((error = xfs_bmap_finish(tpp, &flist, &committed))) { - goto error1; - } - - if (committed) { - tp = *tpp; - xfs_trans_bjoin(tp, bp); - } else { - xfs_trans_bhold_release(tp, bp); - } - - *O_bpp = bp; - return 0; - - error1: - xfs_bmap_cancel(&flist); - error0: - xfs_iunlock(quotip, XFS_ILOCK_EXCL); - - return (error); -} - -/* - * Maps a dquot to the buffer containing its on-disk version. - * This returns a ptr to the buffer containing the on-disk dquot - * in the bpp param, and a ptr to the on-disk dquot within that buffer - */ -STATIC int -xfs_qm_dqtobp( - xfs_trans_t **tpp, - xfs_dquot_t *dqp, - xfs_disk_dquot_t **O_ddpp, - xfs_buf_t **O_bpp, - uint flags) -{ - xfs_bmbt_irec_t map; - int nmaps = 1, error; - xfs_buf_t *bp; - xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); - xfs_mount_t *mp = dqp->q_mount; - xfs_disk_dquot_t *ddq; - xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); - xfs_trans_t *tp = (tpp ? *tpp : NULL); - - dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; - - xfs_ilock(quotip, XFS_ILOCK_SHARED); - if (XFS_IS_THIS_QUOTA_OFF(dqp)) { - /* - * Return if this type of quotas is turned off while we - * didn't have the quota inode lock. - */ - xfs_iunlock(quotip, XFS_ILOCK_SHARED); - return ESRCH; - } - - /* - * Find the block map; no allocations yet - */ - error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, - XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, - NULL, 0, &map, &nmaps, NULL); - - xfs_iunlock(quotip, XFS_ILOCK_SHARED); - if (error) - return error; - - ASSERT(nmaps == 1); - ASSERT(map.br_blockcount == 1); - - /* - * Offset of dquot in the (fixed sized) dquot chunk. - */ - dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * - sizeof(xfs_dqblk_t); - - ASSERT(map.br_startblock != DELAYSTARTBLOCK); - if (map.br_startblock == HOLESTARTBLOCK) { - /* - * We don't allocate unless we're asked to - */ - if (!(flags & XFS_QMOPT_DQALLOC)) - return ENOENT; - - ASSERT(tp); - error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, - dqp->q_fileoffset, &bp); - if (error) - return error; - tp = *tpp; - } else { - trace_xfs_dqtobp_read(dqp); - - /* - * store the blkno etc so that we don't have to do the - * mapping all the time - */ - dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); - - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, - 0, &bp); - if (error || !bp) - return XFS_ERROR(error); - } - - ASSERT(xfs_buf_islocked(bp)); - - /* - * calculate the location of the dquot inside the buffer. - */ - ddq = bp->b_addr + dqp->q_bufoffset; - - /* - * A simple sanity check in case we got a corrupted dquot... - */ - error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, - flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), - "dqtobp"); - if (error) { - if (!(flags & XFS_QMOPT_DQREPAIR)) { - xfs_trans_brelse(tp, bp); - return XFS_ERROR(EIO); - } - } - - *O_bpp = bp; - *O_ddpp = ddq; - - return (0); -} - - -/* - * Read in the ondisk dquot using dqtobp() then copy it to an incore version, - * and release the buffer immediately. - * - */ -/* ARGSUSED */ -STATIC int -xfs_qm_dqread( - xfs_trans_t **tpp, - xfs_dqid_t id, - xfs_dquot_t *dqp, /* dquot to get filled in */ - uint flags) -{ - xfs_disk_dquot_t *ddqp; - xfs_buf_t *bp; - int error; - xfs_trans_t *tp; - - ASSERT(tpp); - - trace_xfs_dqread(dqp); - - /* - * get a pointer to the on-disk dquot and the buffer containing it - * dqp already knows its own type (GROUP/USER). - */ - if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { - return (error); - } - tp = *tpp; - - /* copy everything from disk dquot to the incore dquot */ - memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); - ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); - xfs_qm_dquot_logitem_init(dqp); - - /* - * Reservation counters are defined as reservation plus current usage - * to avoid having to add every time. - */ - dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); - dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); - dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); - - /* Mark the buf so that this will stay incore a little longer */ - XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF); - - /* - * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) - * So we need to release with xfs_trans_brelse(). - * The strategy here is identical to that of inodes; we lock - * the dquot in xfs_qm_dqget() before making it accessible to - * others. This is because dquots, like inodes, need a good level of - * concurrency, and we don't want to take locks on the entire buffers - * for dquot accesses. - * Note also that the dquot buffer may even be dirty at this point, if - * this particular dquot was repaired. We still aren't afraid to - * brelse it because we have the changes incore. - */ - ASSERT(xfs_buf_islocked(bp)); - xfs_trans_brelse(tp, bp); - - return (error); -} - - -/* - * allocate an incore dquot from the kernel heap, - * and fill its core with quota information kept on disk. - * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk - * if it wasn't already allocated. - */ -STATIC int -xfs_qm_idtodq( - xfs_mount_t *mp, - xfs_dqid_t id, /* gid or uid, depending on type */ - uint type, /* UDQUOT or GDQUOT */ - uint flags, /* DQALLOC, DQREPAIR */ - xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */ -{ - xfs_dquot_t *dqp; - int error; - xfs_trans_t *tp; - int cancelflags=0; - - dqp = xfs_qm_dqinit(mp, id, type); - tp = NULL; - if (flags & XFS_QMOPT_DQALLOC) { - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); - error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), - XFS_WRITE_LOG_RES(mp) + - BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + - 128, - 0, - XFS_TRANS_PERM_LOG_RES, - XFS_WRITE_LOG_COUNT); - if (error) { - cancelflags = 0; - goto error0; - } - cancelflags = XFS_TRANS_RELEASE_LOG_RES; - } - - /* - * Read it from disk; xfs_dqread() takes care of - * all the necessary initialization of dquot's fields (locks, etc) - */ - if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) { - /* - * This can happen if quotas got turned off (ESRCH), - * or if the dquot didn't exist on disk and we ask to - * allocate (ENOENT). - */ - trace_xfs_dqread_fail(dqp); - cancelflags |= XFS_TRANS_ABORT; - goto error0; - } - if (tp) { - if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) - goto error1; - } - - *O_dqpp = dqp; - return (0); - - error0: - ASSERT(error); - if (tp) - xfs_trans_cancel(tp, cancelflags); - error1: - xfs_qm_dqdestroy(dqp); - *O_dqpp = NULL; - return (error); -} - -/* - * Lookup a dquot in the incore dquot hashtable. We keep two separate - * hashtables for user and group dquots; and, these are global tables - * inside the XQM, not per-filesystem tables. - * The hash chain must be locked by caller, and it is left locked - * on return. Returning dquot is locked. - */ -STATIC int -xfs_qm_dqlookup( - xfs_mount_t *mp, - xfs_dqid_t id, - xfs_dqhash_t *qh, - xfs_dquot_t **O_dqpp) -{ - xfs_dquot_t *dqp; - uint flist_locked; - - ASSERT(mutex_is_locked(&qh->qh_lock)); - - flist_locked = B_FALSE; - - /* - * Traverse the hashchain looking for a match - */ - list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { - /* - * We already have the hashlock. We don't need the - * dqlock to look at the id field of the dquot, since the - * id can't be modified without the hashlock anyway. - */ - if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { - trace_xfs_dqlookup_found(dqp); - - /* - * All in core dquots must be on the dqlist of mp - */ - ASSERT(!list_empty(&dqp->q_mplist)); - - xfs_dqlock(dqp); - if (dqp->q_nrefs == 0) { - ASSERT(!list_empty(&dqp->q_freelist)); - if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { - trace_xfs_dqlookup_want(dqp); - - /* - * We may have raced with dqreclaim_one() - * (and lost). So, flag that we don't - * want the dquot to be reclaimed. - */ - dqp->dq_flags |= XFS_DQ_WANT; - xfs_dqunlock(dqp); - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); - xfs_dqlock(dqp); - dqp->dq_flags &= ~(XFS_DQ_WANT); - } - flist_locked = B_TRUE; - } - - /* - * id couldn't have changed; we had the hashlock all - * along - */ - ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); - - if (flist_locked) { - if (dqp->q_nrefs != 0) { - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - flist_locked = B_FALSE; - } else { - /* take it off the freelist */ - trace_xfs_dqlookup_freelist(dqp); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - } - } - - XFS_DQHOLD(dqp); - - if (flist_locked) - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - /* - * move the dquot to the front of the hashchain - */ - ASSERT(mutex_is_locked(&qh->qh_lock)); - list_move(&dqp->q_hashlist, &qh->qh_list); - trace_xfs_dqlookup_done(dqp); - *O_dqpp = dqp; - return 0; - } - } - - *O_dqpp = NULL; - ASSERT(mutex_is_locked(&qh->qh_lock)); - return (1); -} - -/* - * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a - * a locked dquot, doing an allocation (if requested) as needed. - * When both an inode and an id are given, the inode's id takes precedence. - * That is, if the id changes while we don't hold the ilock inside this - * function, the new dquot is returned, not necessarily the one requested - * in the id argument. - */ -int -xfs_qm_dqget( - xfs_mount_t *mp, - xfs_inode_t *ip, /* locked inode (optional) */ - xfs_dqid_t id, /* uid/projid/gid depending on type */ - uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ - uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ - xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ -{ - xfs_dquot_t *dqp; - xfs_dqhash_t *h; - uint version; - int error; - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || - (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || - (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { - return (ESRCH); - } - h = XFS_DQ_HASH(mp, id, type); - -#ifdef DEBUG - if (xfs_do_dqerror) { - if ((xfs_dqerror_target == mp->m_ddev_targp) && - (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { - xfs_debug(mp, "Returning error in dqget"); - return (EIO); - } - } -#endif - - again: - -#ifdef DEBUG - ASSERT(type == XFS_DQ_USER || - type == XFS_DQ_PROJ || - type == XFS_DQ_GROUP); - if (ip) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (type == XFS_DQ_USER) - ASSERT(ip->i_udquot == NULL); - else - ASSERT(ip->i_gdquot == NULL); - } -#endif - mutex_lock(&h->qh_lock); - - /* - * Look in the cache (hashtable). - * The chain is kept locked during lookup. - */ - if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) { - XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); - /* - * The dquot was found, moved to the front of the chain, - * taken off the freelist if it was on it, and locked - * at this point. Just unlock the hashchain and return. - */ - ASSERT(*O_dqpp); - ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); - mutex_unlock(&h->qh_lock); - trace_xfs_dqget_hit(*O_dqpp); - return (0); /* success */ - } - XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); - - /* - * Dquot cache miss. We don't want to keep the inode lock across - * a (potential) disk read. Also we don't want to deal with the lock - * ordering between quotainode and this inode. OTOH, dropping the inode - * lock here means dealing with a chown that can happen before - * we re-acquire the lock. - */ - if (ip) - xfs_iunlock(ip, XFS_ILOCK_EXCL); - /* - * Save the hashchain version stamp, and unlock the chain, so that - * we don't keep the lock across a disk read - */ - version = h->qh_version; - mutex_unlock(&h->qh_lock); - - /* - * Allocate the dquot on the kernel heap, and read the ondisk - * portion off the disk. Also, do all the necessary initialization - * This can return ENOENT if dquot didn't exist on disk and we didn't - * ask it to allocate; ESRCH if quotas got turned off suddenly. - */ - if ((error = xfs_qm_idtodq(mp, id, type, - flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR| - XFS_QMOPT_DOWARN), - &dqp))) { - if (ip) - xfs_ilock(ip, XFS_ILOCK_EXCL); - return (error); - } - - /* - * See if this is mount code calling to look at the overall quota limits - * which are stored in the id == 0 user or group's dquot. - * Since we may not have done a quotacheck by this point, just return - * the dquot without attaching it to any hashtables, lists, etc, or even - * taking a reference. - * The caller must dqdestroy this once done. - */ - if (flags & XFS_QMOPT_DQSUSER) { - ASSERT(id == 0); - ASSERT(! ip); - goto dqret; - } - - /* - * Dquot lock comes after hashlock in the lock ordering - */ - if (ip) { - xfs_ilock(ip, XFS_ILOCK_EXCL); - - /* - * A dquot could be attached to this inode by now, since - * we had dropped the ilock. - */ - if (type == XFS_DQ_USER) { - if (!XFS_IS_UQUOTA_ON(mp)) { - /* inode stays locked on return */ - xfs_qm_dqdestroy(dqp); - return XFS_ERROR(ESRCH); - } - if (ip->i_udquot) { - xfs_qm_dqdestroy(dqp); - dqp = ip->i_udquot; - xfs_dqlock(dqp); - goto dqret; - } - } else { - if (!XFS_IS_OQUOTA_ON(mp)) { - /* inode stays locked on return */ - xfs_qm_dqdestroy(dqp); - return XFS_ERROR(ESRCH); - } - if (ip->i_gdquot) { - xfs_qm_dqdestroy(dqp); - dqp = ip->i_gdquot; - xfs_dqlock(dqp); - goto dqret; - } - } - } - - /* - * Hashlock comes after ilock in lock order - */ - mutex_lock(&h->qh_lock); - if (version != h->qh_version) { - xfs_dquot_t *tmpdqp; - /* - * Now, see if somebody else put the dquot in the - * hashtable before us. This can happen because we didn't - * keep the hashchain lock. We don't have to worry about - * lock order between the two dquots here since dqp isn't - * on any findable lists yet. - */ - if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) { - /* - * Duplicate found. Just throw away the new dquot - * and start over. - */ - xfs_qm_dqput(tmpdqp); - mutex_unlock(&h->qh_lock); - xfs_qm_dqdestroy(dqp); - XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); - goto again; - } - } - - /* - * Put the dquot at the beginning of the hash-chain and mp's list - * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. - */ - ASSERT(mutex_is_locked(&h->qh_lock)); - dqp->q_hash = h; - list_add(&dqp->q_hashlist, &h->qh_list); - h->qh_version++; - - /* - * Attach this dquot to this filesystem's list of all dquots, - * kept inside the mount structure in m_quotainfo field - */ - mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); - - /* - * We return a locked dquot to the caller, with a reference taken - */ - xfs_dqlock(dqp); - dqp->q_nrefs = 1; - - list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); - mp->m_quotainfo->qi_dquots++; - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); - mutex_unlock(&h->qh_lock); - dqret: - ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); - trace_xfs_dqget_miss(dqp); - *O_dqpp = dqp; - return (0); -} - - -/* - * Release a reference to the dquot (decrement ref-count) - * and unlock it. If there is a group quota attached to this - * dquot, carefully release that too without tripping over - * deadlocks'n'stuff. - */ -void -xfs_qm_dqput( - xfs_dquot_t *dqp) -{ - xfs_dquot_t *gdqp; - - ASSERT(dqp->q_nrefs > 0); - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - - trace_xfs_dqput(dqp); - - if (dqp->q_nrefs != 1) { - dqp->q_nrefs--; - xfs_dqunlock(dqp); - return; - } - - /* - * drop the dqlock and acquire the freelist and dqlock - * in the right order; but try to get it out-of-order first - */ - if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { - trace_xfs_dqput_wait(dqp); - xfs_dqunlock(dqp); - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); - xfs_dqlock(dqp); - } - - while (1) { - gdqp = NULL; - - /* We can't depend on nrefs being == 1 here */ - if (--dqp->q_nrefs == 0) { - trace_xfs_dqput_free(dqp); - - list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); - xfs_Gqm->qm_dqfrlist_cnt++; - - /* - * If we just added a udquot to the freelist, then - * we want to release the gdquot reference that - * it (probably) has. Otherwise it'll keep the - * gdquot from getting reclaimed. - */ - if ((gdqp = dqp->q_gdquot)) { - /* - * Avoid a recursive dqput call - */ - xfs_dqlock(gdqp); - dqp->q_gdquot = NULL; - } - } - xfs_dqunlock(dqp); - - /* - * If we had a group quota inside the user quota as a hint, - * release it now. - */ - if (! gdqp) - break; - dqp = gdqp; - } - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); -} - -/* - * Release a dquot. Flush it if dirty, then dqput() it. - * dquot must not be locked. - */ -void -xfs_qm_dqrele( - xfs_dquot_t *dqp) -{ - if (!dqp) - return; - - trace_xfs_dqrele(dqp); - - xfs_dqlock(dqp); - /* - * We don't care to flush it if the dquot is dirty here. - * That will create stutters that we want to avoid. - * Instead we do a delayed write when we try to reclaim - * a dirty dquot. Also xfs_sync will take part of the burden... - */ - xfs_qm_dqput(dqp); -} - -/* - * This is the dquot flushing I/O completion routine. It is called - * from interrupt level when the buffer containing the dquot is - * flushed to disk. It is responsible for removing the dquot logitem - * from the AIL if it has not been re-logged, and unlocking the dquot's - * flush lock. This behavior is very similar to that of inodes.. - */ -STATIC void -xfs_qm_dqflush_done( - struct xfs_buf *bp, - struct xfs_log_item *lip) -{ - xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; - xfs_dquot_t *dqp = qip->qli_dquot; - struct xfs_ail *ailp = lip->li_ailp; - - /* - * We only want to pull the item from the AIL if its - * location in the log has not changed since we started the flush. - * Thus, we only bother if the dquot's lsn has - * not changed. First we check the lsn outside the lock - * since it's cheaper, and then we recheck while - * holding the lock before removing the dquot from the AIL. - */ - if ((lip->li_flags & XFS_LI_IN_AIL) && - lip->li_lsn == qip->qli_flush_lsn) { - - /* xfs_trans_ail_delete() drops the AIL lock. */ - spin_lock(&ailp->xa_lock); - if (lip->li_lsn == qip->qli_flush_lsn) - xfs_trans_ail_delete(ailp, lip); - else - spin_unlock(&ailp->xa_lock); - } - - /* - * Release the dq's flush lock since we're done with it. - */ - xfs_dqfunlock(dqp); -} - -/* - * Write a modified dquot to disk. - * The dquot must be locked and the flush lock too taken by caller. - * The flush lock will not be unlocked until the dquot reaches the disk, - * but the dquot is free to be unlocked and modified by the caller - * in the interim. Dquot is still locked on return. This behavior is - * identical to that of inodes. - */ -int -xfs_qm_dqflush( - xfs_dquot_t *dqp, - uint flags) -{ - struct xfs_mount *mp = dqp->q_mount; - struct xfs_buf *bp; - struct xfs_disk_dquot *ddqp; - int error; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(!completion_done(&dqp->q_flush)); - - trace_xfs_dqflush(dqp); - - /* - * If not dirty, or it's pinned and we are not supposed to block, nada. - */ - if (!XFS_DQ_IS_DIRTY(dqp) || - (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { - xfs_dqfunlock(dqp); - return 0; - } - xfs_qm_dqunpin_wait(dqp); - - /* - * This may have been unpinned because the filesystem is shutting - * down forcibly. If that's the case we must not write this dquot - * to disk, because the log record didn't make it to disk! - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - dqp->dq_flags &= ~XFS_DQ_DIRTY; - xfs_dqfunlock(dqp); - return XFS_ERROR(EIO); - } - - /* - * Get the buffer containing the on-disk dquot - */ - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0, &bp); - if (error) { - ASSERT(error != ENOENT); - xfs_dqfunlock(dqp); - return error; - } - - /* - * Calculate the location of the dquot inside the buffer. - */ - ddqp = bp->b_addr + dqp->q_bufoffset; - - /* - * A simple sanity check in case we got a corrupted dquot.. - */ - error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, - XFS_QMOPT_DOWARN, "dqflush (incore copy)"); - if (error) { - xfs_buf_relse(bp); - xfs_dqfunlock(dqp); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - return XFS_ERROR(EIO); - } - - /* This is the only portion of data that needs to persist */ - memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); - - /* - * Clear the dirty field and remember the flush lsn for later use. - */ - dqp->dq_flags &= ~XFS_DQ_DIRTY; - - xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, - &dqp->q_logitem.qli_item.li_lsn); - - /* - * Attach an iodone routine so that we can remove this dquot from the - * AIL and release the flush lock once the dquot is synced to disk. - */ - xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, - &dqp->q_logitem.qli_item); - - /* - * If the buffer is pinned then push on the log so we won't - * get stuck waiting in the write for too long. - */ - if (xfs_buf_ispinned(bp)) { - trace_xfs_dqflush_force(dqp); - xfs_log_force(mp, 0); - } - - if (flags & SYNC_WAIT) - error = xfs_bwrite(mp, bp); - else - xfs_bdwrite(mp, bp); - - trace_xfs_dqflush_done(dqp); - - /* - * dqp is still locked, but caller is free to unlock it now. - */ - return error; - -} - -int -xfs_qm_dqlock_nowait( - xfs_dquot_t *dqp) -{ - return mutex_trylock(&dqp->q_qlock); -} - -void -xfs_dqlock( - xfs_dquot_t *dqp) -{ - mutex_lock(&dqp->q_qlock); -} - -void -xfs_dqunlock( - xfs_dquot_t *dqp) -{ - mutex_unlock(&(dqp->q_qlock)); - if (dqp->q_logitem.qli_dquot == dqp) { - /* Once was dqp->q_mount, but might just have been cleared */ - xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, - (xfs_log_item_t*)&(dqp->q_logitem)); - } -} - - -void -xfs_dqunlock_nonotify( - xfs_dquot_t *dqp) -{ - mutex_unlock(&(dqp->q_qlock)); -} - -/* - * Lock two xfs_dquot structures. - * - * To avoid deadlocks we always lock the quota structure with - * the lowerd id first. - */ -void -xfs_dqlock2( - xfs_dquot_t *d1, - xfs_dquot_t *d2) -{ - if (d1 && d2) { - ASSERT(d1 != d2); - if (be32_to_cpu(d1->q_core.d_id) > - be32_to_cpu(d2->q_core.d_id)) { - mutex_lock(&d2->q_qlock); - mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); - } else { - mutex_lock(&d1->q_qlock); - mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); - } - } else if (d1) { - mutex_lock(&d1->q_qlock); - } else if (d2) { - mutex_lock(&d2->q_qlock); - } -} - - -/* - * Take a dquot out of the mount's dqlist as well as the hashlist. - * This is called via unmount as well as quotaoff, and the purge - * will always succeed unless there are soft (temp) references - * outstanding. - * - * This returns 0 if it was purged, 1 if it wasn't. It's not an error code - * that we're returning! XXXsup - not cool. - */ -/* ARGSUSED */ -int -xfs_qm_dqpurge( - xfs_dquot_t *dqp) -{ - xfs_dqhash_t *qh = dqp->q_hash; - xfs_mount_t *mp = dqp->q_mount; - - ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock)); - ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); - - xfs_dqlock(dqp); - /* - * We really can't afford to purge a dquot that is - * referenced, because these are hard refs. - * It shouldn't happen in general because we went thru _all_ inodes in - * dqrele_all_inodes before calling this and didn't let the mountlock go. - * However it is possible that we have dquots with temporary - * references that are not attached to an inode. e.g. see xfs_setattr(). - */ - if (dqp->q_nrefs != 0) { - xfs_dqunlock(dqp); - mutex_unlock(&dqp->q_hash->qh_lock); - return (1); - } - - ASSERT(!list_empty(&dqp->q_freelist)); - - /* - * If we're turning off quotas, we have to make sure that, for - * example, we don't delete quota disk blocks while dquots are - * in the process of getting written to those disk blocks. - * This dquot might well be on AIL, and we can't leave it there - * if we're turning off quotas. Basically, we need this flush - * lock, and are willing to block on it. - */ - if (!xfs_dqflock_nowait(dqp)) { - /* - * Block on the flush lock after nudging dquot buffer, - * if it is incore. - */ - xfs_qm_dqflock_pushbuf_wait(dqp); - } - - /* - * XXXIf we're turning this type of quotas off, we don't care - * about the dirty metadata sitting in this dquot. OTOH, if - * we're unmounting, we do care, so we flush it and wait. - */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; - - /* dqflush unlocks dqflock */ - /* - * Given that dqpurge is a very rare occurrence, it is OK - * that we're holding the hashlist and mplist locks - * across the disk write. But, ... XXXsup - * - * We don't care about getting disk errors here. We need - * to purge this dquot anyway, so we go ahead regardless. - */ - error = xfs_qm_dqflush(dqp, SYNC_WAIT); - if (error) - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - xfs_dqflock(dqp); - } - ASSERT(atomic_read(&dqp->q_pincount) == 0); - ASSERT(XFS_FORCED_SHUTDOWN(mp) || - !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); - - list_del_init(&dqp->q_hashlist); - qh->qh_version++; - list_del_init(&dqp->q_mplist); - mp->m_quotainfo->qi_dqreclaims++; - mp->m_quotainfo->qi_dquots--; - /* - * XXX Move this to the front of the freelist, if we can get the - * freelist lock. - */ - ASSERT(!list_empty(&dqp->q_freelist)); - - dqp->q_mount = NULL; - dqp->q_hash = NULL; - dqp->dq_flags = XFS_DQ_INACTIVE; - memset(&dqp->q_core, 0, sizeof(dqp->q_core)); - xfs_dqfunlock(dqp); - xfs_dqunlock(dqp); - mutex_unlock(&qh->qh_lock); - return (0); -} - - -/* - * Give the buffer a little push if it is incore and - * wait on the flush lock. - */ -void -xfs_qm_dqflock_pushbuf_wait( - xfs_dquot_t *dqp) -{ - xfs_mount_t *mp = dqp->q_mount; - xfs_buf_t *bp; - - /* - * Check to see if the dquot has been flushed delayed - * write. If so, grab its buffer and send it - * out immediately. We'll be able to acquire - * the flush lock when the I/O completes. - */ - bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); - if (!bp) - goto out_lock; - - if (XFS_BUF_ISDELAYWRITE(bp)) { - if (xfs_buf_ispinned(bp)) - xfs_log_force(mp, 0); - xfs_buf_delwri_promote(bp); - wake_up_process(bp->b_target->bt_task); - } - xfs_buf_relse(bp); -out_lock: - xfs_dqflock(dqp); -} diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h deleted file mode 100644 index 34b7e945dbfa..000000000000 --- a/fs/xfs/quota/xfs_dquot.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DQUOT_H__ -#define __XFS_DQUOT_H__ - -/* - * Dquots are structures that hold quota information about a user or a group, - * much like inodes are for files. In fact, dquots share many characteristics - * with inodes. However, dquots can also be a centralized resource, relative - * to a collection of inodes. In this respect, dquots share some characteristics - * of the superblock. - * XFS dquots exploit both those in its algorithms. They make every attempt - * to not be a bottleneck when quotas are on and have minimal impact, if any, - * when quotas are off. - */ - -/* - * The hash chain headers (hash buckets) - */ -typedef struct xfs_dqhash { - struct list_head qh_list; - struct mutex qh_lock; - uint qh_version; /* ever increasing version */ - uint qh_nelems; /* number of dquots on the list */ -} xfs_dqhash_t; - -struct xfs_mount; -struct xfs_trans; - -/* - * The incore dquot structure - */ -typedef struct xfs_dquot { - uint dq_flags; /* various flags (XFS_DQ_*) */ - struct list_head q_freelist; /* global free list of dquots */ - struct list_head q_mplist; /* mount's list of dquots */ - struct list_head q_hashlist; /* gloabl hash list of dquots */ - xfs_dqhash_t *q_hash; /* the hashchain header */ - struct xfs_mount*q_mount; /* filesystem this relates to */ - struct xfs_trans*q_transp; /* trans this belongs to currently */ - uint q_nrefs; /* # active refs from inodes */ - xfs_daddr_t q_blkno; /* blkno of dquot buffer */ - int q_bufoffset; /* off of dq in buffer (# dquots) */ - xfs_fileoff_t q_fileoffset; /* offset in quotas file */ - - struct xfs_dquot*q_gdquot; /* group dquot, hint only */ - xfs_disk_dquot_t q_core; /* actual usage & quotas */ - xfs_dq_logitem_t q_logitem; /* dquot log item */ - xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ - xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ - xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ - struct mutex q_qlock; /* quota lock */ - struct completion q_flush; /* flush completion queue */ - atomic_t q_pincount; /* dquot pin count */ - wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ -} xfs_dquot_t; - -/* - * Lock hierarchy for q_qlock: - * XFS_QLOCK_NORMAL is the implicit default, - * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 - */ -enum { - XFS_QLOCK_NORMAL = 0, - XFS_QLOCK_NESTED, -}; - -#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) - -/* - * Manage the q_flush completion queue embedded in the dquot. This completion - * queue synchronizes processes attempting to flush the in-core dquot back to - * disk. - */ -static inline void xfs_dqflock(xfs_dquot_t *dqp) -{ - wait_for_completion(&dqp->q_flush); -} - -static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) -{ - return try_wait_for_completion(&dqp->q_flush); -} - -static inline void xfs_dqfunlock(xfs_dquot_t *dqp) -{ - complete(&dqp->q_flush); -} - -#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) -#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) -#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) -#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) -#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) -#define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo) -#define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \ - XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ - XFS_DQ_TO_QINF(dqp)->qi_gquotaip) - -#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \ - (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ - (XFS_IS_OQUOTA_ON((d)->q_mount)))) - -extern void xfs_qm_dqdestroy(xfs_dquot_t *); -extern int xfs_qm_dqflush(xfs_dquot_t *, uint); -extern int xfs_qm_dqpurge(xfs_dquot_t *); -extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); -extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); -extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); -extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, - xfs_disk_dquot_t *); -extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, - xfs_disk_dquot_t *); -extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, - xfs_dqid_t, uint, uint, xfs_dquot_t **); -extern void xfs_qm_dqput(xfs_dquot_t *); -extern void xfs_dqlock(xfs_dquot_t *); -extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); -extern void xfs_dqunlock(xfs_dquot_t *); -extern void xfs_dqunlock_nonotify(xfs_dquot_t *); - -#endif /* __XFS_DQUOT_H__ */ diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c deleted file mode 100644 index 9e0e2fa3f2c8..000000000000 --- a/fs/xfs/quota/xfs_dquot_item.c +++ /dev/null @@ -1,529 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_trans_priv.h" -#include "xfs_qm.h" - -static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) -{ - return container_of(lip, struct xfs_dq_logitem, qli_item); -} - -/* - * returns the number of iovecs needed to log the given dquot item. - */ -STATIC uint -xfs_qm_dquot_logitem_size( - struct xfs_log_item *lip) -{ - /* - * we need only two iovecs, one for the format, one for the real thing - */ - return 2; -} - -/* - * fills in the vector of log iovecs for the given dquot log item. - */ -STATIC void -xfs_qm_dquot_logitem_format( - struct xfs_log_item *lip, - struct xfs_log_iovec *logvec) -{ - struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - - logvec->i_addr = &qlip->qli_format; - logvec->i_len = sizeof(xfs_dq_logformat_t); - logvec->i_type = XLOG_REG_TYPE_QFORMAT; - logvec++; - logvec->i_addr = &qlip->qli_dquot->q_core; - logvec->i_len = sizeof(xfs_disk_dquot_t); - logvec->i_type = XLOG_REG_TYPE_DQUOT; - - ASSERT(2 == lip->li_desc->lid_size); - qlip->qli_format.qlf_size = 2; - -} - -/* - * Increment the pin count of the given dquot. - */ -STATIC void -xfs_qm_dquot_logitem_pin( - struct xfs_log_item *lip) -{ - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - atomic_inc(&dqp->q_pincount); -} - -/* - * Decrement the pin count of the given dquot, and wake up - * anyone in xfs_dqwait_unpin() if the count goes to 0. The - * dquot must have been previously pinned with a call to - * xfs_qm_dquot_logitem_pin(). - */ -STATIC void -xfs_qm_dquot_logitem_unpin( - struct xfs_log_item *lip, - int remove) -{ - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - - ASSERT(atomic_read(&dqp->q_pincount) > 0); - if (atomic_dec_and_test(&dqp->q_pincount)) - wake_up(&dqp->q_pinwait); -} - -/* - * Given the logitem, this writes the corresponding dquot entry to disk - * asynchronously. This is called with the dquot entry securely locked; - * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot - * at the end. - */ -STATIC void -xfs_qm_dquot_logitem_push( - struct xfs_log_item *lip) -{ - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - int error; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(!completion_done(&dqp->q_flush)); - - /* - * Since we were able to lock the dquot's flush lock and - * we found it on the AIL, the dquot must be dirty. This - * is because the dquot is removed from the AIL while still - * holding the flush lock in xfs_dqflush_done(). Thus, if - * we found it in the AIL and were able to obtain the flush - * lock without sleeping, then there must not have been - * anyone in the process of flushing the dquot. - */ - error = xfs_qm_dqflush(dqp, 0); - if (error) - xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", - __func__, error, dqp); - xfs_dqunlock(dqp); -} - -STATIC xfs_lsn_t -xfs_qm_dquot_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - /* - * We always re-log the entire dquot when it becomes dirty, - * so, the latest copy _is_ the only one that matters. - */ - return lsn; -} - -/* - * This is called to wait for the given dquot to be unpinned. - * Most of these pin/unpin routines are plagiarized from inode code. - */ -void -xfs_qm_dqunpin_wait( - struct xfs_dquot *dqp) -{ - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - if (atomic_read(&dqp->q_pincount) == 0) - return; - - /* - * Give the log a push so we don't wait here too long. - */ - xfs_log_force(dqp->q_mount, 0); - wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); -} - -/* - * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that - * the dquot is locked by us, but the flush lock isn't. So, here we are - * going to see if the relevant dquot buffer is incore, waiting on DELWRI. - * If so, we want to push it out to help us take this item off the AIL as soon - * as possible. - * - * We must not be holding the AIL lock at this point. Calling incore() to - * search the buffer cache can be a time consuming thing, and AIL lock is a - * spinlock. - */ -STATIC void -xfs_qm_dquot_logitem_pushbuf( - struct xfs_log_item *lip) -{ - struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); - struct xfs_dquot *dqp = qlip->qli_dquot; - struct xfs_buf *bp; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - - /* - * If flushlock isn't locked anymore, chances are that the - * inode flush completed and the inode was taken off the AIL. - * So, just get out. - */ - if (completion_done(&dqp->q_flush) || - !(lip->li_flags & XFS_LI_IN_AIL)) { - xfs_dqunlock(dqp); - return; - } - - bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, - dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); - xfs_dqunlock(dqp); - if (!bp) - return; - if (XFS_BUF_ISDELAYWRITE(bp)) - xfs_buf_delwri_promote(bp); - xfs_buf_relse(bp); -} - -/* - * This is called to attempt to lock the dquot associated with this - * dquot log item. Don't sleep on the dquot lock or the flush lock. - * If the flush lock is already held, indicating that the dquot has - * been or is in the process of being flushed, then see if we can - * find the dquot's buffer in the buffer cache without sleeping. If - * we can and it is marked delayed write, then we want to send it out. - * We delay doing so until the push routine, though, to avoid sleeping - * in any device strategy routines. - */ -STATIC uint -xfs_qm_dquot_logitem_trylock( - struct xfs_log_item *lip) -{ - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - - if (atomic_read(&dqp->q_pincount) > 0) - return XFS_ITEM_PINNED; - - if (!xfs_qm_dqlock_nowait(dqp)) - return XFS_ITEM_LOCKED; - - if (!xfs_dqflock_nowait(dqp)) { - /* - * dquot has already been flushed to the backing buffer, - * leave it locked, pushbuf routine will unlock it. - */ - return XFS_ITEM_PUSHBUF; - } - - ASSERT(lip->li_flags & XFS_LI_IN_AIL); - return XFS_ITEM_SUCCESS; -} - -/* - * Unlock the dquot associated with the log item. - * Clear the fields of the dquot and dquot log item that - * are specific to the current transaction. If the - * hold flags is set, do not unlock the dquot. - */ -STATIC void -xfs_qm_dquot_logitem_unlock( - struct xfs_log_item *lip) -{ - struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - - /* - * Clear the transaction pointer in the dquot - */ - dqp->q_transp = NULL; - - /* - * dquots are never 'held' from getting unlocked at the end of - * a transaction. Their locking and unlocking is hidden inside the - * transaction layer, within trans_commit. Hence, no LI_HOLD flag - * for the logitem. - */ - xfs_dqunlock(dqp); -} - -/* - * this needs to stamp an lsn into the dquot, I think. - * rpc's that look at user dquot's would then have to - * push on the dependency recorded in the dquot - */ -STATIC void -xfs_qm_dquot_logitem_committing( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ -} - -/* - * This is the ops vector for dquots - */ -static struct xfs_item_ops xfs_dquot_item_ops = { - .iop_size = xfs_qm_dquot_logitem_size, - .iop_format = xfs_qm_dquot_logitem_format, - .iop_pin = xfs_qm_dquot_logitem_pin, - .iop_unpin = xfs_qm_dquot_logitem_unpin, - .iop_trylock = xfs_qm_dquot_logitem_trylock, - .iop_unlock = xfs_qm_dquot_logitem_unlock, - .iop_committed = xfs_qm_dquot_logitem_committed, - .iop_push = xfs_qm_dquot_logitem_push, - .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf, - .iop_committing = xfs_qm_dquot_logitem_committing -}; - -/* - * Initialize the dquot log item for a newly allocated dquot. - * The dquot isn't locked at this point, but it isn't on any of the lists - * either, so we don't care. - */ -void -xfs_qm_dquot_logitem_init( - struct xfs_dquot *dqp) -{ - struct xfs_dq_logitem *lp = &dqp->q_logitem; - - xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, - &xfs_dquot_item_ops); - lp->qli_dquot = dqp; - lp->qli_format.qlf_type = XFS_LI_DQUOT; - lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); - lp->qli_format.qlf_blkno = dqp->q_blkno; - lp->qli_format.qlf_len = 1; - /* - * This is just the offset of this dquot within its buffer - * (which is currently 1 FSB and probably won't change). - * Hence 32 bits for this offset should be just fine. - * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t)) - * here, and recompute it at recovery time. - */ - lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset; -} - -/*------------------ QUOTAOFF LOG ITEMS -------------------*/ - -static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip) -{ - return container_of(lip, struct xfs_qoff_logitem, qql_item); -} - - -/* - * This returns the number of iovecs needed to log the given quotaoff item. - * We only need 1 iovec for an quotaoff item. It just logs the - * quotaoff_log_format structure. - */ -STATIC uint -xfs_qm_qoff_logitem_size( - struct xfs_log_item *lip) -{ - return 1; -} - -/* - * This is called to fill in the vector of log iovecs for the - * given quotaoff log item. We use only 1 iovec, and we point that - * at the quotaoff_log_format structure embedded in the quotaoff item. - * It is at this point that we assert that all of the extent - * slots in the quotaoff item have been filled. - */ -STATIC void -xfs_qm_qoff_logitem_format( - struct xfs_log_item *lip, - struct xfs_log_iovec *log_vector) -{ - struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); - - ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); - - log_vector->i_addr = &qflip->qql_format; - log_vector->i_len = sizeof(xfs_qoff_logitem_t); - log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; - qflip->qql_format.qf_size = 1; -} - -/* - * Pinning has no meaning for an quotaoff item, so just return. - */ -STATIC void -xfs_qm_qoff_logitem_pin( - struct xfs_log_item *lip) -{ -} - -/* - * Since pinning has no meaning for an quotaoff item, unpinning does - * not either. - */ -STATIC void -xfs_qm_qoff_logitem_unpin( - struct xfs_log_item *lip, - int remove) -{ -} - -/* - * Quotaoff items have no locking, so just return success. - */ -STATIC uint -xfs_qm_qoff_logitem_trylock( - struct xfs_log_item *lip) -{ - return XFS_ITEM_LOCKED; -} - -/* - * Quotaoff items have no locking or pushing, so return failure - * so that the caller doesn't bother with us. - */ -STATIC void -xfs_qm_qoff_logitem_unlock( - struct xfs_log_item *lip) -{ -} - -/* - * The quotaoff-start-item is logged only once and cannot be moved in the log, - * so simply return the lsn at which it's been logged. - */ -STATIC xfs_lsn_t -xfs_qm_qoff_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - return lsn; -} - -/* - * There isn't much you can do to push on an quotaoff item. It is simply - * stuck waiting for the log to be flushed to disk. - */ -STATIC void -xfs_qm_qoff_logitem_push( - struct xfs_log_item *lip) -{ -} - - -STATIC xfs_lsn_t -xfs_qm_qoffend_logitem_committed( - struct xfs_log_item *lip, - xfs_lsn_t lsn) -{ - struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip); - struct xfs_qoff_logitem *qfs = qfe->qql_start_lip; - struct xfs_ail *ailp = qfs->qql_item.li_ailp; - - /* - * Delete the qoff-start logitem from the AIL. - * xfs_trans_ail_delete() drops the AIL lock. - */ - spin_lock(&ailp->xa_lock); - xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); - - kmem_free(qfs); - kmem_free(qfe); - return (xfs_lsn_t)-1; -} - -/* - * XXX rcc - don't know quite what to do with this. I think we can - * just ignore it. The only time that isn't the case is if we allow - * the client to somehow see that quotas have been turned off in which - * we can't allow that to get back until the quotaoff hits the disk. - * So how would that happen? Also, do we need different routines for - * quotaoff start and quotaoff end? I suspect the answer is yes but - * to be sure, I need to look at the recovery code and see how quota off - * recovery is handled (do we roll forward or back or do something else). - * If we roll forwards or backwards, then we need two separate routines, - * one that does nothing and one that stamps in the lsn that matters - * (truly makes the quotaoff irrevocable). If we do something else, - * then maybe we don't need two. - */ -STATIC void -xfs_qm_qoff_logitem_committing( - struct xfs_log_item *lip, - xfs_lsn_t commit_lsn) -{ -} - -static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { - .iop_size = xfs_qm_qoff_logitem_size, - .iop_format = xfs_qm_qoff_logitem_format, - .iop_pin = xfs_qm_qoff_logitem_pin, - .iop_unpin = xfs_qm_qoff_logitem_unpin, - .iop_trylock = xfs_qm_qoff_logitem_trylock, - .iop_unlock = xfs_qm_qoff_logitem_unlock, - .iop_committed = xfs_qm_qoffend_logitem_committed, - .iop_push = xfs_qm_qoff_logitem_push, - .iop_committing = xfs_qm_qoff_logitem_committing -}; - -/* - * This is the ops vector shared by all quotaoff-start log items. - */ -static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { - .iop_size = xfs_qm_qoff_logitem_size, - .iop_format = xfs_qm_qoff_logitem_format, - .iop_pin = xfs_qm_qoff_logitem_pin, - .iop_unpin = xfs_qm_qoff_logitem_unpin, - .iop_trylock = xfs_qm_qoff_logitem_trylock, - .iop_unlock = xfs_qm_qoff_logitem_unlock, - .iop_committed = xfs_qm_qoff_logitem_committed, - .iop_push = xfs_qm_qoff_logitem_push, - .iop_committing = xfs_qm_qoff_logitem_committing -}; - -/* - * Allocate and initialize an quotaoff item of the correct quota type(s). - */ -struct xfs_qoff_logitem * -xfs_qm_qoff_logitem_init( - struct xfs_mount *mp, - struct xfs_qoff_logitem *start, - uint flags) -{ - struct xfs_qoff_logitem *qf; - - qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP); - - xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? - &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); - qf->qql_item.li_mountp = mp; - qf->qql_format.qf_type = XFS_LI_QUOTAOFF; - qf->qql_format.qf_flags = flags; - qf->qql_start_lip = start; - return qf; -} diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h deleted file mode 100644 index 5acae2ada70b..000000000000 --- a/fs/xfs/quota/xfs_dquot_item.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_DQUOT_ITEM_H__ -#define __XFS_DQUOT_ITEM_H__ - -struct xfs_dquot; -struct xfs_trans; -struct xfs_mount; -struct xfs_qoff_logitem; - -typedef struct xfs_dq_logitem { - xfs_log_item_t qli_item; /* common portion */ - struct xfs_dquot *qli_dquot; /* dquot ptr */ - xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ - xfs_dq_logformat_t qli_format; /* logged structure */ -} xfs_dq_logitem_t; - -typedef struct xfs_qoff_logitem { - xfs_log_item_t qql_item; /* common portion */ - struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ - xfs_qoff_logformat_t qql_format; /* logged structure */ -} xfs_qoff_logitem_t; - - -extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *); -extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *, - struct xfs_qoff_logitem *, uint); -extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *, - struct xfs_qoff_logitem *, uint); -extern void xfs_trans_log_quotaoff_item(struct xfs_trans *, - struct xfs_qoff_logitem *); - -#endif /* __XFS_DQUOT_ITEM_H__ */ diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c deleted file mode 100644 index 9a0aa76facdf..000000000000 --- a/fs/xfs/quota/xfs_qm.c +++ /dev/null @@ -1,2416 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" -#include "xfs_ialloc.h" -#include "xfs_itable.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_bmap.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_trans_space.h" -#include "xfs_utils.h" -#include "xfs_qm.h" -#include "xfs_trace.h" - -/* - * The global quota manager. There is only one of these for the entire - * system, _not_ one per file system. XQM keeps track of the overall - * quota functionality, including maintaining the freelist and hash - * tables of dquots. - */ -struct mutex xfs_Gqm_lock; -struct xfs_qm *xfs_Gqm; -uint ndquot; - -kmem_zone_t *qm_dqzone; -kmem_zone_t *qm_dqtrxzone; - -STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); -STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); - -STATIC int xfs_qm_init_quotainos(xfs_mount_t *); -STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); -STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); - -static struct shrinker xfs_qm_shaker = { - .shrink = xfs_qm_shake, - .seeks = DEFAULT_SEEKS, -}; - -/* - * Initialize the XQM structure. - * Note that there is not one quota manager per file system. - */ -STATIC struct xfs_qm * -xfs_Gqm_init(void) -{ - xfs_dqhash_t *udqhash, *gdqhash; - xfs_qm_t *xqm; - size_t hsize; - uint i; - - /* - * Initialize the dquot hash tables. - */ - udqhash = kmem_zalloc_greedy(&hsize, - XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), - XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t)); - if (!udqhash) - goto out; - - gdqhash = kmem_zalloc_large(hsize); - if (!gdqhash) - goto out_free_udqhash; - - hsize /= sizeof(xfs_dqhash_t); - ndquot = hsize << 8; - - xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); - xqm->qm_dqhashmask = hsize - 1; - xqm->qm_usr_dqhtable = udqhash; - xqm->qm_grp_dqhtable = gdqhash; - ASSERT(xqm->qm_usr_dqhtable != NULL); - ASSERT(xqm->qm_grp_dqhtable != NULL); - - for (i = 0; i < hsize; i++) { - xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i); - xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i); - } - - /* - * Freelist of all dquots of all file systems - */ - INIT_LIST_HEAD(&xqm->qm_dqfrlist); - xqm->qm_dqfrlist_cnt = 0; - mutex_init(&xqm->qm_dqfrlist_lock); - - /* - * dquot zone. we register our own low-memory callback. - */ - if (!qm_dqzone) { - xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t), - "xfs_dquots"); - qm_dqzone = xqm->qm_dqzone; - } else - xqm->qm_dqzone = qm_dqzone; - - register_shrinker(&xfs_qm_shaker); - - /* - * The t_dqinfo portion of transactions. - */ - if (!qm_dqtrxzone) { - xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t), - "xfs_dqtrx"); - qm_dqtrxzone = xqm->qm_dqtrxzone; - } else - xqm->qm_dqtrxzone = qm_dqtrxzone; - - atomic_set(&xqm->qm_totaldquots, 0); - xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; - xqm->qm_nrefs = 0; - return xqm; - - out_free_udqhash: - kmem_free_large(udqhash); - out: - return NULL; -} - -/* - * Destroy the global quota manager when its reference count goes to zero. - */ -STATIC void -xfs_qm_destroy( - struct xfs_qm *xqm) -{ - struct xfs_dquot *dqp, *n; - int hsize, i; - - ASSERT(xqm != NULL); - ASSERT(xqm->qm_nrefs == 0); - unregister_shrinker(&xfs_qm_shaker); - hsize = xqm->qm_dqhashmask + 1; - for (i = 0; i < hsize; i++) { - xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); - xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); - } - kmem_free_large(xqm->qm_usr_dqhtable); - kmem_free_large(xqm->qm_grp_dqhtable); - xqm->qm_usr_dqhtable = NULL; - xqm->qm_grp_dqhtable = NULL; - xqm->qm_dqhashmask = 0; - - /* frlist cleanup */ - mutex_lock(&xqm->qm_dqfrlist_lock); - list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { - xfs_dqlock(dqp); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - xfs_dqunlock(dqp); - xfs_qm_dqdestroy(dqp); - } - mutex_unlock(&xqm->qm_dqfrlist_lock); - mutex_destroy(&xqm->qm_dqfrlist_lock); - kmem_free(xqm); -} - -/* - * Called at mount time to let XQM know that another file system is - * starting quotas. This isn't crucial information as the individual mount - * structures are pretty independent, but it helps the XQM keep a - * global view of what's going on. - */ -/* ARGSUSED */ -STATIC int -xfs_qm_hold_quotafs_ref( - struct xfs_mount *mp) -{ - /* - * Need to lock the xfs_Gqm structure for things like this. For example, - * the structure could disappear between the entry to this routine and - * a HOLD operation if not locked. - */ - mutex_lock(&xfs_Gqm_lock); - - if (!xfs_Gqm) { - xfs_Gqm = xfs_Gqm_init(); - if (!xfs_Gqm) { - mutex_unlock(&xfs_Gqm_lock); - return ENOMEM; - } - } - - /* - * We can keep a list of all filesystems with quotas mounted for - * debugging and statistical purposes, but ... - * Just take a reference and get out. - */ - xfs_Gqm->qm_nrefs++; - mutex_unlock(&xfs_Gqm_lock); - - return 0; -} - - -/* - * Release the reference that a filesystem took at mount time, - * so that we know when we need to destroy the entire quota manager. - */ -/* ARGSUSED */ -STATIC void -xfs_qm_rele_quotafs_ref( - struct xfs_mount *mp) -{ - xfs_dquot_t *dqp, *n; - - ASSERT(xfs_Gqm); - ASSERT(xfs_Gqm->qm_nrefs > 0); - - /* - * Go thru the freelist and destroy all inactive dquots. - */ - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); - - list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) { - xfs_dqlock(dqp); - if (dqp->dq_flags & XFS_DQ_INACTIVE) { - ASSERT(dqp->q_mount == NULL); - ASSERT(! XFS_DQ_IS_DIRTY(dqp)); - ASSERT(list_empty(&dqp->q_hashlist)); - ASSERT(list_empty(&dqp->q_mplist)); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - xfs_dqunlock(dqp); - xfs_qm_dqdestroy(dqp); - } else { - xfs_dqunlock(dqp); - } - } - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - - /* - * Destroy the entire XQM. If somebody mounts with quotaon, this'll - * be restarted. - */ - mutex_lock(&xfs_Gqm_lock); - if (--xfs_Gqm->qm_nrefs == 0) { - xfs_qm_destroy(xfs_Gqm); - xfs_Gqm = NULL; - } - mutex_unlock(&xfs_Gqm_lock); -} - -/* - * Just destroy the quotainfo structure. - */ -void -xfs_qm_unmount( - struct xfs_mount *mp) -{ - if (mp->m_quotainfo) { - xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); - xfs_qm_destroy_quotainfo(mp); - } -} - - -/* - * This is called from xfs_mountfs to start quotas and initialize all - * necessary data structures like quotainfo. This is also responsible for - * running a quotacheck as necessary. We are guaranteed that the superblock - * is consistently read in at this point. - * - * If we fail here, the mount will continue with quota turned off. We don't - * need to inidicate success or failure at all. - */ -void -xfs_qm_mount_quotas( - xfs_mount_t *mp) -{ - int error = 0; - uint sbf; - - /* - * If quotas on realtime volumes is not supported, we disable - * quotas immediately. - */ - if (mp->m_sb.sb_rextents) { - xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); - mp->m_qflags = 0; - goto write_changes; - } - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - /* - * Allocate the quotainfo structure inside the mount struct, and - * create quotainode(s), and change/rev superblock if necessary. - */ - error = xfs_qm_init_quotainfo(mp); - if (error) { - /* - * We must turn off quotas. - */ - ASSERT(mp->m_quotainfo == NULL); - mp->m_qflags = 0; - goto write_changes; - } - /* - * If any of the quotas are not consistent, do a quotacheck. - */ - if (XFS_QM_NEED_QUOTACHECK(mp)) { - error = xfs_qm_quotacheck(mp); - if (error) { - /* Quotacheck failed and disabled quotas. */ - return; - } - } - /* - * If one type of quotas is off, then it will lose its - * quotachecked status, since we won't be doing accounting for - * that type anymore. - */ - if (!XFS_IS_UQUOTA_ON(mp)) - mp->m_qflags &= ~XFS_UQUOTA_CHKD; - if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) - mp->m_qflags &= ~XFS_OQUOTA_CHKD; - - write_changes: - /* - * We actually don't have to acquire the m_sb_lock at all. - * This can only be called from mount, and that's single threaded. XXX - */ - spin_lock(&mp->m_sb_lock); - sbf = mp->m_sb.sb_qflags; - mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; - spin_unlock(&mp->m_sb_lock); - - if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { - if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { - /* - * We could only have been turning quotas off. - * We aren't in very good shape actually because - * the incore structures are convinced that quotas are - * off, but the on disk superblock doesn't know that ! - */ - ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); - xfs_alert(mp, "%s: Superblock update failed!", - __func__); - } - } - - if (error) { - xfs_warn(mp, "Failed to initialize disk quotas."); - return; - } -} - -/* - * Called from the vfsops layer. - */ -void -xfs_qm_unmount_quotas( - xfs_mount_t *mp) -{ - /* - * Release the dquots that root inode, et al might be holding, - * before we flush quotas and blow away the quotainfo structure. - */ - ASSERT(mp->m_rootip); - xfs_qm_dqdetach(mp->m_rootip); - if (mp->m_rbmip) - xfs_qm_dqdetach(mp->m_rbmip); - if (mp->m_rsumip) - xfs_qm_dqdetach(mp->m_rsumip); - - /* - * Release the quota inodes. - */ - if (mp->m_quotainfo) { - if (mp->m_quotainfo->qi_uquotaip) { - IRELE(mp->m_quotainfo->qi_uquotaip); - mp->m_quotainfo->qi_uquotaip = NULL; - } - if (mp->m_quotainfo->qi_gquotaip) { - IRELE(mp->m_quotainfo->qi_gquotaip); - mp->m_quotainfo->qi_gquotaip = NULL; - } - } -} - -/* - * Flush all dquots of the given file system to disk. The dquots are - * _not_ purged from memory here, just their data written to disk. - */ -STATIC int -xfs_qm_dqflush_all( - struct xfs_mount *mp, - int sync_mode) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - int recl; - struct xfs_dquot *dqp; - int error; - - if (!q) - return 0; -again: - mutex_lock(&q->qi_dqlist_lock); - list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { - xfs_dqlock(dqp); - if (! XFS_DQ_IS_DIRTY(dqp)) { - xfs_dqunlock(dqp); - continue; - } - - /* XXX a sentinel would be better */ - recl = q->qi_dqreclaims; - if (!xfs_dqflock_nowait(dqp)) { - /* - * If we can't grab the flush lock then check - * to see if the dquot has been flushed delayed - * write. If so, grab its buffer and send it - * out immediately. We'll be able to acquire - * the flush lock when the I/O completes. - */ - xfs_qm_dqflock_pushbuf_wait(dqp); - } - /* - * Let go of the mplist lock. We don't want to hold it - * across a disk write. - */ - mutex_unlock(&q->qi_dqlist_lock); - error = xfs_qm_dqflush(dqp, sync_mode); - xfs_dqunlock(dqp); - if (error) - return error; - - mutex_lock(&q->qi_dqlist_lock); - if (recl != q->qi_dqreclaims) { - mutex_unlock(&q->qi_dqlist_lock); - /* XXX restart limit */ - goto again; - } - } - - mutex_unlock(&q->qi_dqlist_lock); - /* return ! busy */ - return 0; -} -/* - * Release the group dquot pointers the user dquots may be - * carrying around as a hint. mplist is locked on entry and exit. - */ -STATIC void -xfs_qm_detach_gdquots( - struct xfs_mount *mp) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - struct xfs_dquot *dqp, *gdqp; - int nrecl; - - again: - ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); - list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { - xfs_dqlock(dqp); - if ((gdqp = dqp->q_gdquot)) { - xfs_dqlock(gdqp); - dqp->q_gdquot = NULL; - } - xfs_dqunlock(dqp); - - if (gdqp) { - /* - * Can't hold the mplist lock across a dqput. - * XXXmust convert to marker based iterations here. - */ - nrecl = q->qi_dqreclaims; - mutex_unlock(&q->qi_dqlist_lock); - xfs_qm_dqput(gdqp); - - mutex_lock(&q->qi_dqlist_lock); - if (nrecl != q->qi_dqreclaims) - goto again; - } - } -} - -/* - * Go through all the incore dquots of this file system and take them - * off the mplist and hashlist, if the dquot type matches the dqtype - * parameter. This is used when turning off quota accounting for - * users and/or groups, as well as when the filesystem is unmounting. - */ -STATIC int -xfs_qm_dqpurge_int( - struct xfs_mount *mp, - uint flags) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - struct xfs_dquot *dqp, *n; - uint dqtype; - int nrecl; - int nmisses; - - if (!q) - return 0; - - dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; - dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; - dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; - - mutex_lock(&q->qi_dqlist_lock); - - /* - * In the first pass through all incore dquots of this filesystem, - * we release the group dquot pointers the user dquots may be - * carrying around as a hint. We need to do this irrespective of - * what's being turned off. - */ - xfs_qm_detach_gdquots(mp); - - again: - nmisses = 0; - ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); - /* - * Try to get rid of all of the unwanted dquots. The idea is to - * get them off mplist and hashlist, but leave them on freelist. - */ - list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { - /* - * It's OK to look at the type without taking dqlock here. - * We're holding the mplist lock here, and that's needed for - * a dqreclaim. - */ - if ((dqp->dq_flags & dqtype) == 0) - continue; - - if (!mutex_trylock(&dqp->q_hash->qh_lock)) { - nrecl = q->qi_dqreclaims; - mutex_unlock(&q->qi_dqlist_lock); - mutex_lock(&dqp->q_hash->qh_lock); - mutex_lock(&q->qi_dqlist_lock); - - /* - * XXXTheoretically, we can get into a very long - * ping pong game here. - * No one can be adding dquots to the mplist at - * this point, but somebody might be taking things off. - */ - if (nrecl != q->qi_dqreclaims) { - mutex_unlock(&dqp->q_hash->qh_lock); - goto again; - } - } - - /* - * Take the dquot off the mplist and hashlist. It may remain on - * freelist in INACTIVE state. - */ - nmisses += xfs_qm_dqpurge(dqp); - } - mutex_unlock(&q->qi_dqlist_lock); - return nmisses; -} - -int -xfs_qm_dqpurge_all( - xfs_mount_t *mp, - uint flags) -{ - int ndquots; - - /* - * Purge the dquot cache. - * None of the dquots should really be busy at this point. - */ - if (mp->m_quotainfo) { - while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) { - delay(ndquots * 10); - } - } - return 0; -} - -STATIC int -xfs_qm_dqattach_one( - xfs_inode_t *ip, - xfs_dqid_t id, - uint type, - uint doalloc, - xfs_dquot_t *udqhint, /* hint */ - xfs_dquot_t **IO_idqpp) -{ - xfs_dquot_t *dqp; - int error; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - error = 0; - - /* - * See if we already have it in the inode itself. IO_idqpp is - * &i_udquot or &i_gdquot. This made the code look weird, but - * made the logic a lot simpler. - */ - dqp = *IO_idqpp; - if (dqp) { - trace_xfs_dqattach_found(dqp); - return 0; - } - - /* - * udqhint is the i_udquot field in inode, and is non-NULL only - * when the type arg is group/project. Its purpose is to save a - * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside - * the user dquot. - */ - if (udqhint) { - ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); - xfs_dqlock(udqhint); - - /* - * No need to take dqlock to look at the id. - * - * The ID can't change until it gets reclaimed, and it won't - * be reclaimed as long as we have a ref from inode and we - * hold the ilock. - */ - dqp = udqhint->q_gdquot; - if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { - xfs_dqlock(dqp); - XFS_DQHOLD(dqp); - ASSERT(*IO_idqpp == NULL); - *IO_idqpp = dqp; - - xfs_dqunlock(dqp); - xfs_dqunlock(udqhint); - return 0; - } - - /* - * We can't hold a dquot lock when we call the dqget code. - * We'll deadlock in no time, because of (not conforming to) - * lock ordering - the inodelock comes before any dquot lock, - * and we may drop and reacquire the ilock in xfs_qm_dqget(). - */ - xfs_dqunlock(udqhint); - } - - /* - * Find the dquot from somewhere. This bumps the - * reference count of dquot and returns it locked. - * This can return ENOENT if dquot didn't exist on - * disk and we didn't ask it to allocate; - * ESRCH if quotas got turned off suddenly. - */ - error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp); - if (error) - return error; - - trace_xfs_dqattach_get(dqp); - - /* - * dqget may have dropped and re-acquired the ilock, but it guarantees - * that the dquot returned is the one that should go in the inode. - */ - *IO_idqpp = dqp; - xfs_dqunlock(dqp); - return 0; -} - - -/* - * Given a udquot and gdquot, attach a ptr to the group dquot in the - * udquot as a hint for future lookups. The idea sounds simple, but the - * execution isn't, because the udquot might have a group dquot attached - * already and getting rid of that gets us into lock ordering constraints. - * The process is complicated more by the fact that the dquots may or may not - * be locked on entry. - */ -STATIC void -xfs_qm_dqattach_grouphint( - xfs_dquot_t *udq, - xfs_dquot_t *gdq) -{ - xfs_dquot_t *tmp; - - xfs_dqlock(udq); - - if ((tmp = udq->q_gdquot)) { - if (tmp == gdq) { - xfs_dqunlock(udq); - return; - } - - udq->q_gdquot = NULL; - /* - * We can't keep any dqlocks when calling dqrele, - * because the freelist lock comes before dqlocks. - */ - xfs_dqunlock(udq); - /* - * we took a hard reference once upon a time in dqget, - * so give it back when the udquot no longer points at it - * dqput() does the unlocking of the dquot. - */ - xfs_qm_dqrele(tmp); - - xfs_dqlock(udq); - xfs_dqlock(gdq); - - } else { - ASSERT(XFS_DQ_IS_LOCKED(udq)); - xfs_dqlock(gdq); - } - - ASSERT(XFS_DQ_IS_LOCKED(udq)); - ASSERT(XFS_DQ_IS_LOCKED(gdq)); - /* - * Somebody could have attached a gdquot here, - * when we dropped the uqlock. If so, just do nothing. - */ - if (udq->q_gdquot == NULL) { - XFS_DQHOLD(gdq); - udq->q_gdquot = gdq; - } - - xfs_dqunlock(gdq); - xfs_dqunlock(udq); -} - - -/* - * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON - * into account. - * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. - * Inode may get unlocked and relocked in here, and the caller must deal with - * the consequences. - */ -int -xfs_qm_dqattach_locked( - xfs_inode_t *ip, - uint flags) -{ - xfs_mount_t *mp = ip->i_mount; - uint nquotas = 0; - int error = 0; - - if (!XFS_IS_QUOTA_RUNNING(mp) || - !XFS_IS_QUOTA_ON(mp) || - !XFS_NOT_DQATTACHED(mp, ip) || - ip->i_ino == mp->m_sb.sb_uquotino || - ip->i_ino == mp->m_sb.sb_gquotino) - return 0; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - - if (XFS_IS_UQUOTA_ON(mp)) { - error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, - flags & XFS_QMOPT_DQALLOC, - NULL, &ip->i_udquot); - if (error) - goto done; - nquotas++; - } - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - if (XFS_IS_OQUOTA_ON(mp)) { - error = XFS_IS_GQUOTA_ON(mp) ? - xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, - flags & XFS_QMOPT_DQALLOC, - ip->i_udquot, &ip->i_gdquot) : - xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, - flags & XFS_QMOPT_DQALLOC, - ip->i_udquot, &ip->i_gdquot); - /* - * Don't worry about the udquot that we may have - * attached above. It'll get detached, if not already. - */ - if (error) - goto done; - nquotas++; - } - - /* - * Attach this group quota to the user quota as a hint. - * This WON'T, in general, result in a thrash. - */ - if (nquotas == 2) { - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(ip->i_udquot); - ASSERT(ip->i_gdquot); - - /* - * We may or may not have the i_udquot locked at this point, - * but this check is OK since we don't depend on the i_gdquot to - * be accurate 100% all the time. It is just a hint, and this - * will succeed in general. - */ - if (ip->i_udquot->q_gdquot == ip->i_gdquot) - goto done; - /* - * Attach i_gdquot to the gdquot hint inside the i_udquot. - */ - xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); - } - - done: -#ifdef DEBUG - if (!error) { - if (XFS_IS_UQUOTA_ON(mp)) - ASSERT(ip->i_udquot); - if (XFS_IS_OQUOTA_ON(mp)) - ASSERT(ip->i_gdquot); - } - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -#endif - return error; -} - -int -xfs_qm_dqattach( - struct xfs_inode *ip, - uint flags) -{ - int error; - - xfs_ilock(ip, XFS_ILOCK_EXCL); - error = xfs_qm_dqattach_locked(ip, flags); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - - return error; -} - -/* - * Release dquots (and their references) if any. - * The inode should be locked EXCL except when this's called by - * xfs_ireclaim. - */ -void -xfs_qm_dqdetach( - xfs_inode_t *ip) -{ - if (!(ip->i_udquot || ip->i_gdquot)) - return; - - trace_xfs_dquot_dqdetach(ip); - - ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); - ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); - if (ip->i_udquot) { - xfs_qm_dqrele(ip->i_udquot); - ip->i_udquot = NULL; - } - if (ip->i_gdquot) { - xfs_qm_dqrele(ip->i_gdquot); - ip->i_gdquot = NULL; - } -} - -int -xfs_qm_sync( - struct xfs_mount *mp, - int flags) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - int recl, restarts; - struct xfs_dquot *dqp; - int error; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; - - restarts = 0; - - again: - mutex_lock(&q->qi_dqlist_lock); - /* - * dqpurge_all() also takes the mplist lock and iterate thru all dquots - * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared - * when we have the mplist lock, we know that dquots will be consistent - * as long as we have it locked. - */ - if (!XFS_IS_QUOTA_ON(mp)) { - mutex_unlock(&q->qi_dqlist_lock); - return 0; - } - ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); - list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { - /* - * If this is vfs_sync calling, then skip the dquots that - * don't 'seem' to be dirty. ie. don't acquire dqlock. - * This is very similar to what xfs_sync does with inodes. - */ - if (flags & SYNC_TRYLOCK) { - if (!XFS_DQ_IS_DIRTY(dqp)) - continue; - if (!xfs_qm_dqlock_nowait(dqp)) - continue; - } else { - xfs_dqlock(dqp); - } - - /* - * Now, find out for sure if this dquot is dirty or not. - */ - if (! XFS_DQ_IS_DIRTY(dqp)) { - xfs_dqunlock(dqp); - continue; - } - - /* XXX a sentinel would be better */ - recl = q->qi_dqreclaims; - if (!xfs_dqflock_nowait(dqp)) { - if (flags & SYNC_TRYLOCK) { - xfs_dqunlock(dqp); - continue; - } - /* - * If we can't grab the flush lock then if the caller - * really wanted us to give this our best shot, so - * see if we can give a push to the buffer before we wait - * on the flush lock. At this point, we know that - * even though the dquot is being flushed, - * it has (new) dirty data. - */ - xfs_qm_dqflock_pushbuf_wait(dqp); - } - /* - * Let go of the mplist lock. We don't want to hold it - * across a disk write - */ - mutex_unlock(&q->qi_dqlist_lock); - error = xfs_qm_dqflush(dqp, flags); - xfs_dqunlock(dqp); - if (error && XFS_FORCED_SHUTDOWN(mp)) - return 0; /* Need to prevent umount failure */ - else if (error) - return error; - - mutex_lock(&q->qi_dqlist_lock); - if (recl != q->qi_dqreclaims) { - if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS) - break; - - mutex_unlock(&q->qi_dqlist_lock); - goto again; - } - } - - mutex_unlock(&q->qi_dqlist_lock); - return 0; -} - -/* - * The hash chains and the mplist use the same xfs_dqhash structure as - * their list head, but we can take the mplist qh_lock and one of the - * hash qh_locks at the same time without any problem as they aren't - * related. - */ -static struct lock_class_key xfs_quota_mplist_class; - -/* - * This initializes all the quota information that's kept in the - * mount structure - */ -STATIC int -xfs_qm_init_quotainfo( - xfs_mount_t *mp) -{ - xfs_quotainfo_t *qinf; - int error; - xfs_dquot_t *dqp; - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - /* - * Tell XQM that we exist as soon as possible. - */ - if ((error = xfs_qm_hold_quotafs_ref(mp))) { - return error; - } - - qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); - - /* - * See if quotainodes are setup, and if not, allocate them, - * and change the superblock accordingly. - */ - if ((error = xfs_qm_init_quotainos(mp))) { - kmem_free(qinf); - mp->m_quotainfo = NULL; - return error; - } - - INIT_LIST_HEAD(&qinf->qi_dqlist); - mutex_init(&qinf->qi_dqlist_lock); - lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class); - - qinf->qi_dqreclaims = 0; - - /* mutex used to serialize quotaoffs */ - mutex_init(&qinf->qi_quotaofflock); - - /* Precalc some constants */ - qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); - ASSERT(qinf->qi_dqchunklen); - qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen); - do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t)); - - mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); - - /* - * We try to get the limits from the superuser's limits fields. - * This is quite hacky, but it is standard quota practice. - * We look at the USR dquot with id == 0 first, but if user quotas - * are not enabled we goto the GRP dquot with id == 0. - * We don't really care to keep separate default limits for user - * and group quotas, at least not at this point. - */ - error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, - XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : - (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : - XFS_DQ_PROJ), - XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, - &dqp); - if (! error) { - xfs_disk_dquot_t *ddqp = &dqp->q_core; - - /* - * The warnings and timers set the grace period given to - * a user or group before he or she can not perform any - * more writing. If it is zero, a default is used. - */ - qinf->qi_btimelimit = ddqp->d_btimer ? - be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT; - qinf->qi_itimelimit = ddqp->d_itimer ? - be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT; - qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ? - be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT; - qinf->qi_bwarnlimit = ddqp->d_bwarns ? - be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT; - qinf->qi_iwarnlimit = ddqp->d_iwarns ? - be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; - qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? - be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; - qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); - qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); - qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); - qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); - qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); - qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); - - /* - * We sent the XFS_QMOPT_DQSUSER flag to dqget because - * we don't want this dquot cached. We haven't done a - * quotacheck yet, and quotacheck doesn't like incore dquots. - */ - xfs_qm_dqdestroy(dqp); - } else { - qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; - qinf->qi_itimelimit = XFS_QM_ITIMELIMIT; - qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; - qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; - qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; - qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; - } - - return 0; -} - - -/* - * Gets called when unmounting a filesystem or when all quotas get - * turned off. - * This purges the quota inodes, destroys locks and frees itself. - */ -void -xfs_qm_destroy_quotainfo( - xfs_mount_t *mp) -{ - xfs_quotainfo_t *qi; - - qi = mp->m_quotainfo; - ASSERT(qi != NULL); - ASSERT(xfs_Gqm != NULL); - - /* - * Release the reference that XQM kept, so that we know - * when the XQM structure should be freed. We cannot assume - * that xfs_Gqm is non-null after this point. - */ - xfs_qm_rele_quotafs_ref(mp); - - ASSERT(list_empty(&qi->qi_dqlist)); - mutex_destroy(&qi->qi_dqlist_lock); - - if (qi->qi_uquotaip) { - IRELE(qi->qi_uquotaip); - qi->qi_uquotaip = NULL; /* paranoia */ - } - if (qi->qi_gquotaip) { - IRELE(qi->qi_gquotaip); - qi->qi_gquotaip = NULL; - } - mutex_destroy(&qi->qi_quotaofflock); - kmem_free(qi); - mp->m_quotainfo = NULL; -} - - - -/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */ - -/* ARGSUSED */ -STATIC void -xfs_qm_list_init( - xfs_dqlist_t *list, - char *str, - int n) -{ - mutex_init(&list->qh_lock); - INIT_LIST_HEAD(&list->qh_list); - list->qh_version = 0; - list->qh_nelems = 0; -} - -STATIC void -xfs_qm_list_destroy( - xfs_dqlist_t *list) -{ - mutex_destroy(&(list->qh_lock)); -} - -/* - * Create an inode and return with a reference already taken, but unlocked - * This is how we create quota inodes - */ -STATIC int -xfs_qm_qino_alloc( - xfs_mount_t *mp, - xfs_inode_t **ip, - __int64_t sbfields, - uint flags) -{ - xfs_trans_t *tp; - int error; - int committed; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); - if ((error = xfs_trans_reserve(tp, - XFS_QM_QINOCREATE_SPACE_RES(mp), - XFS_CREATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_CREATE_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - return error; - } - - error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); - if (error) { - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT); - return error; - } - - /* - * Make the changes in the superblock, and log those too. - * sbfields arg may contain fields other than *QUOTINO; - * VERSIONNUM for example. - */ - spin_lock(&mp->m_sb_lock); - if (flags & XFS_QMOPT_SBVERSION) { - ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); - ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == - (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_QFLAGS)); - - xfs_sb_version_addquota(&mp->m_sb); - mp->m_sb.sb_uquotino = NULLFSINO; - mp->m_sb.sb_gquotino = NULLFSINO; - - /* qflags will get updated _after_ quotacheck */ - mp->m_sb.sb_qflags = 0; - } - if (flags & XFS_QMOPT_UQUOTA) - mp->m_sb.sb_uquotino = (*ip)->i_ino; - else - mp->m_sb.sb_gquotino = (*ip)->i_ino; - spin_unlock(&mp->m_sb_lock); - xfs_mod_sb(tp, sbfields); - - if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { - xfs_alert(mp, "%s failed (error %d)!", __func__, error); - return error; - } - return 0; -} - - -STATIC void -xfs_qm_reset_dqcounts( - xfs_mount_t *mp, - xfs_buf_t *bp, - xfs_dqid_t id, - uint type) -{ - xfs_disk_dquot_t *ddq; - int j; - - trace_xfs_reset_dqcounts(bp, _RET_IP_); - - /* - * Reset all counters and timers. They'll be - * started afresh by xfs_qm_quotacheck. - */ -#ifdef DEBUG - j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); - do_div(j, sizeof(xfs_dqblk_t)); - ASSERT(mp->m_quotainfo->qi_dqperchunk == j); -#endif - ddq = bp->b_addr; - for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { - /* - * Do a sanity check, and if needed, repair the dqblk. Don't - * output any warnings because it's perfectly possible to - * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. - */ - (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, - "xfs_quotacheck"); - ddq->d_bcount = 0; - ddq->d_icount = 0; - ddq->d_rtbcount = 0; - ddq->d_btimer = 0; - ddq->d_itimer = 0; - ddq->d_rtbtimer = 0; - ddq->d_bwarns = 0; - ddq->d_iwarns = 0; - ddq->d_rtbwarns = 0; - ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); - } -} - -STATIC int -xfs_qm_dqiter_bufs( - xfs_mount_t *mp, - xfs_dqid_t firstid, - xfs_fsblock_t bno, - xfs_filblks_t blkcnt, - uint flags) -{ - xfs_buf_t *bp; - int error; - int type; - - ASSERT(blkcnt > 0); - type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : - (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); - error = 0; - - /* - * Blkcnt arg can be a very big number, and might even be - * larger than the log itself. So, we have to break it up into - * manageable-sized transactions. - * Note that we don't start a permanent transaction here; we might - * not be able to get a log reservation for the whole thing up front, - * and we don't really care to either, because we just discard - * everything if we were to crash in the middle of this loop. - */ - while (blkcnt--) { - error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, - XFS_FSB_TO_DADDR(mp, bno), - mp->m_quotainfo->qi_dqchunklen, 0, &bp); - if (error) - break; - - xfs_qm_reset_dqcounts(mp, bp, firstid, type); - xfs_bdwrite(mp, bp); - /* - * goto the next block. - */ - bno++; - firstid += mp->m_quotainfo->qi_dqperchunk; - } - return error; -} - -/* - * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a - * caller supplied function for every chunk of dquots that we find. - */ -STATIC int -xfs_qm_dqiterate( - xfs_mount_t *mp, - xfs_inode_t *qip, - uint flags) -{ - xfs_bmbt_irec_t *map; - int i, nmaps; /* number of map entries */ - int error; /* return value */ - xfs_fileoff_t lblkno; - xfs_filblks_t maxlblkcnt; - xfs_dqid_t firstid; - xfs_fsblock_t rablkno; - xfs_filblks_t rablkcnt; - - error = 0; - /* - * This looks racy, but we can't keep an inode lock across a - * trans_reserve. But, this gets called during quotacheck, and that - * happens only at mount time which is single threaded. - */ - if (qip->i_d.di_nblocks == 0) - return 0; - - map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); - - lblkno = 0; - maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); - do { - nmaps = XFS_DQITER_MAP_SIZE; - /* - * We aren't changing the inode itself. Just changing - * some of its data. No new blocks are added here, and - * the inode is never added to the transaction. - */ - xfs_ilock(qip, XFS_ILOCK_SHARED); - error = xfs_bmapi(NULL, qip, lblkno, - maxlblkcnt - lblkno, - XFS_BMAPI_METADATA, - NULL, - 0, map, &nmaps, NULL); - xfs_iunlock(qip, XFS_ILOCK_SHARED); - if (error) - break; - - ASSERT(nmaps <= XFS_DQITER_MAP_SIZE); - for (i = 0; i < nmaps; i++) { - ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); - ASSERT(map[i].br_blockcount); - - - lblkno += map[i].br_blockcount; - - if (map[i].br_startblock == HOLESTARTBLOCK) - continue; - - firstid = (xfs_dqid_t) map[i].br_startoff * - mp->m_quotainfo->qi_dqperchunk; - /* - * Do a read-ahead on the next extent. - */ - if ((i+1 < nmaps) && - (map[i+1].br_startblock != HOLESTARTBLOCK)) { - rablkcnt = map[i+1].br_blockcount; - rablkno = map[i+1].br_startblock; - while (rablkcnt--) { - xfs_buf_readahead(mp->m_ddev_targp, - XFS_FSB_TO_DADDR(mp, rablkno), - mp->m_quotainfo->qi_dqchunklen); - rablkno++; - } - } - /* - * Iterate thru all the blks in the extent and - * reset the counters of all the dquots inside them. - */ - if ((error = xfs_qm_dqiter_bufs(mp, - firstid, - map[i].br_startblock, - map[i].br_blockcount, - flags))) { - break; - } - } - - if (error) - break; - } while (nmaps > 0); - - kmem_free(map); - - return error; -} - -/* - * Called by dqusage_adjust in doing a quotacheck. - * - * Given the inode, and a dquot id this updates both the incore dqout as well - * as the buffer copy. This is so that once the quotacheck is done, we can - * just log all the buffers, as opposed to logging numerous updates to - * individual dquots. - */ -STATIC int -xfs_qm_quotacheck_dqadjust( - struct xfs_inode *ip, - xfs_dqid_t id, - uint type, - xfs_qcnt_t nblks, - xfs_qcnt_t rtblks) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_dquot *dqp; - int error; - - error = xfs_qm_dqget(mp, ip, id, type, - XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); - if (error) { - /* - * Shouldn't be able to turn off quotas here. - */ - ASSERT(error != ESRCH); - ASSERT(error != ENOENT); - return error; - } - - trace_xfs_dqadjust(dqp); - - /* - * Adjust the inode count and the block count to reflect this inode's - * resource usage. - */ - be64_add_cpu(&dqp->q_core.d_icount, 1); - dqp->q_res_icount++; - if (nblks) { - be64_add_cpu(&dqp->q_core.d_bcount, nblks); - dqp->q_res_bcount += nblks; - } - if (rtblks) { - be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks); - dqp->q_res_rtbcount += rtblks; - } - - /* - * Set default limits, adjust timers (since we changed usages) - * - * There are no timers for the default values set in the root dquot. - */ - if (dqp->q_core.d_id) { - xfs_qm_adjust_dqlimits(mp, &dqp->q_core); - xfs_qm_adjust_dqtimers(mp, &dqp->q_core); - } - - dqp->dq_flags |= XFS_DQ_DIRTY; - xfs_qm_dqput(dqp); - return 0; -} - -STATIC int -xfs_qm_get_rtblks( - xfs_inode_t *ip, - xfs_qcnt_t *O_rtblks) -{ - xfs_filblks_t rtblks; /* total rt blks */ - xfs_extnum_t idx; /* extent record index */ - xfs_ifork_t *ifp; /* inode fork pointer */ - xfs_extnum_t nextents; /* number of extent entries */ - int error; - - ASSERT(XFS_IS_REALTIME_INODE(ip)); - ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); - if (!(ifp->if_flags & XFS_IFEXTENTS)) { - if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) - return error; - } - rtblks = 0; - nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); - for (idx = 0; idx < nextents; idx++) - rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); - *O_rtblks = (xfs_qcnt_t)rtblks; - return 0; -} - -/* - * callback routine supplied to bulkstat(). Given an inumber, find its - * dquots and update them to account for resources taken by that inode. - */ -/* ARGSUSED */ -STATIC int -xfs_qm_dqusage_adjust( - xfs_mount_t *mp, /* mount point for filesystem */ - xfs_ino_t ino, /* inode number to get data for */ - void __user *buffer, /* not used */ - int ubsize, /* not used */ - int *ubused, /* not used */ - int *res) /* result code value */ -{ - xfs_inode_t *ip; - xfs_qcnt_t nblks, rtblks = 0; - int error; - - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - /* - * rootino must have its resources accounted for, not so with the quota - * inodes. - */ - if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { - *res = BULKSTAT_RV_NOTHING; - return XFS_ERROR(EINVAL); - } - - /* - * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget - * interface expects the inode to be exclusively locked because that's - * the case in all other instances. It's OK that we do this because - * quotacheck is done only at mount time. - */ - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); - if (error) { - *res = BULKSTAT_RV_NOTHING; - return error; - } - - ASSERT(ip->i_delayed_blks == 0); - - if (XFS_IS_REALTIME_INODE(ip)) { - /* - * Walk thru the extent list and count the realtime blocks. - */ - error = xfs_qm_get_rtblks(ip, &rtblks); - if (error) - goto error0; - } - - nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; - - /* - * Add the (disk blocks and inode) resources occupied by this - * inode to its dquots. We do this adjustment in the incore dquot, - * and also copy the changes to its buffer. - * We don't care about putting these changes in a transaction - * envelope because if we crash in the middle of a 'quotacheck' - * we have to start from the beginning anyway. - * Once we're done, we'll log all the dquot bufs. - * - * The *QUOTA_ON checks below may look pretty racy, but quotachecks - * and quotaoffs don't race. (Quotachecks happen at mount time only). - */ - if (XFS_IS_UQUOTA_ON(mp)) { - error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, - XFS_DQ_USER, nblks, rtblks); - if (error) - goto error0; - } - - if (XFS_IS_GQUOTA_ON(mp)) { - error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, - XFS_DQ_GROUP, nblks, rtblks); - if (error) - goto error0; - } - - if (XFS_IS_PQUOTA_ON(mp)) { - error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), - XFS_DQ_PROJ, nblks, rtblks); - if (error) - goto error0; - } - - xfs_iunlock(ip, XFS_ILOCK_EXCL); - IRELE(ip); - *res = BULKSTAT_RV_DIDONE; - return 0; - -error0: - xfs_iunlock(ip, XFS_ILOCK_EXCL); - IRELE(ip); - *res = BULKSTAT_RV_GIVEUP; - return error; -} - -/* - * Walk thru all the filesystem inodes and construct a consistent view - * of the disk quota world. If the quotacheck fails, disable quotas. - */ -int -xfs_qm_quotacheck( - xfs_mount_t *mp) -{ - int done, count, error; - xfs_ino_t lastino; - size_t structsz; - xfs_inode_t *uip, *gip; - uint flags; - - count = INT_MAX; - structsz = 1; - lastino = 0; - flags = 0; - - ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - /* - * There should be no cached dquots. The (simplistic) quotacheck - * algorithm doesn't like that. - */ - ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); - - xfs_notice(mp, "Quotacheck needed: Please wait."); - - /* - * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset - * their counters to zero. We need a clean slate. - * We don't log our changes till later. - */ - uip = mp->m_quotainfo->qi_uquotaip; - if (uip) { - error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA); - if (error) - goto error_return; - flags |= XFS_UQUOTA_CHKD; - } - - gip = mp->m_quotainfo->qi_gquotaip; - if (gip) { - error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? - XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); - if (error) - goto error_return; - flags |= XFS_OQUOTA_CHKD; - } - - do { - /* - * Iterate thru all the inodes in the file system, - * adjusting the corresponding dquot counters in core. - */ - error = xfs_bulkstat(mp, &lastino, &count, - xfs_qm_dqusage_adjust, - structsz, NULL, &done); - if (error) - break; - - } while (!done); - - /* - * We've made all the changes that we need to make incore. - * Flush them down to disk buffers if everything was updated - * successfully. - */ - if (!error) - error = xfs_qm_dqflush_all(mp, 0); - - /* - * We can get this error if we couldn't do a dquot allocation inside - * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the - * dirty dquots that might be cached, we just want to get rid of them - * and turn quotaoff. The dquots won't be attached to any of the inodes - * at this point (because we intentionally didn't in dqget_noattach). - */ - if (error) { - xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); - goto error_return; - } - - /* - * We didn't log anything, because if we crashed, we'll have to - * start the quotacheck from scratch anyway. However, we must make - * sure that our dquot changes are secure before we put the - * quotacheck'd stamp on the superblock. So, here we do a synchronous - * flush. - */ - XFS_bflush(mp->m_ddev_targp); - - /* - * If one type of quotas is off, then it will lose its - * quotachecked status, since we won't be doing accounting for - * that type anymore. - */ - mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); - mp->m_qflags |= flags; - - error_return: - if (error) { - xfs_warn(mp, - "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", - error); - /* - * We must turn off quotas. - */ - ASSERT(mp->m_quotainfo != NULL); - ASSERT(xfs_Gqm != NULL); - xfs_qm_destroy_quotainfo(mp); - if (xfs_mount_reset_sbqflags(mp)) { - xfs_warn(mp, - "Quotacheck: Failed to reset quota flags."); - } - } else - xfs_notice(mp, "Quotacheck: Done."); - return (error); -} - -/* - * This is called after the superblock has been read in and we're ready to - * iget the quota inodes. - */ -STATIC int -xfs_qm_init_quotainos( - xfs_mount_t *mp) -{ - xfs_inode_t *uip, *gip; - int error; - __int64_t sbflags; - uint flags; - - ASSERT(mp->m_quotainfo); - uip = gip = NULL; - sbflags = 0; - flags = 0; - - /* - * Get the uquota and gquota inodes - */ - if (xfs_sb_version_hasquota(&mp->m_sb)) { - if (XFS_IS_UQUOTA_ON(mp) && - mp->m_sb.sb_uquotino != NULLFSINO) { - ASSERT(mp->m_sb.sb_uquotino > 0); - if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip))) - return XFS_ERROR(error); - } - if (XFS_IS_OQUOTA_ON(mp) && - mp->m_sb.sb_gquotino != NULLFSINO) { - ASSERT(mp->m_sb.sb_gquotino > 0); - if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip))) { - if (uip) - IRELE(uip); - return XFS_ERROR(error); - } - } - } else { - flags |= XFS_QMOPT_SBVERSION; - sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | - XFS_SB_GQUOTINO | XFS_SB_QFLAGS); - } - - /* - * Create the two inodes, if they don't exist already. The changes - * made above will get added to a transaction and logged in one of - * the qino_alloc calls below. If the device is readonly, - * temporarily switch to read-write to do this. - */ - if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { - if ((error = xfs_qm_qino_alloc(mp, &uip, - sbflags | XFS_SB_UQUOTINO, - flags | XFS_QMOPT_UQUOTA))) - return XFS_ERROR(error); - - flags &= ~XFS_QMOPT_SBVERSION; - } - if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { - flags |= (XFS_IS_GQUOTA_ON(mp) ? - XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); - error = xfs_qm_qino_alloc(mp, &gip, - sbflags | XFS_SB_GQUOTINO, flags); - if (error) { - if (uip) - IRELE(uip); - - return XFS_ERROR(error); - } - } - - mp->m_quotainfo->qi_uquotaip = uip; - mp->m_quotainfo->qi_gquotaip = gip; - - return 0; -} - - - -/* - * Just pop the least recently used dquot off the freelist and - * recycle it. The returned dquot is locked. - */ -STATIC xfs_dquot_t * -xfs_qm_dqreclaim_one(void) -{ - xfs_dquot_t *dqpout; - xfs_dquot_t *dqp; - int restarts; - int startagain; - - restarts = 0; - dqpout = NULL; - - /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ -again: - startagain = 0; - mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); - - list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { - struct xfs_mount *mp = dqp->q_mount; - xfs_dqlock(dqp); - - /* - * We are racing with dqlookup here. Naturally we don't - * want to reclaim a dquot that lookup wants. We release the - * freelist lock and start over, so that lookup will grab - * both the dquot and the freelistlock. - */ - if (dqp->dq_flags & XFS_DQ_WANT) { - ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); - - trace_xfs_dqreclaim_want(dqp); - XQM_STATS_INC(xqmstats.xs_qm_dqwants); - restarts++; - startagain = 1; - goto dqunlock; - } - - /* - * If the dquot is inactive, we are assured that it is - * not on the mplist or the hashlist, and that makes our - * life easier. - */ - if (dqp->dq_flags & XFS_DQ_INACTIVE) { - ASSERT(mp == NULL); - ASSERT(! XFS_DQ_IS_DIRTY(dqp)); - ASSERT(list_empty(&dqp->q_hashlist)); - ASSERT(list_empty(&dqp->q_mplist)); - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - dqpout = dqp; - XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); - goto dqunlock; - } - - ASSERT(dqp->q_hash); - ASSERT(!list_empty(&dqp->q_mplist)); - - /* - * Try to grab the flush lock. If this dquot is in the process - * of getting flushed to disk, we don't want to reclaim it. - */ - if (!xfs_dqflock_nowait(dqp)) - goto dqunlock; - - /* - * We have the flush lock so we know that this is not in the - * process of being flushed. So, if this is dirty, flush it - * DELWRI so that we don't get a freelist infested with - * dirty dquots. - */ - if (XFS_DQ_IS_DIRTY(dqp)) { - int error; - - trace_xfs_dqreclaim_dirty(dqp); - - /* - * We flush it delayed write, so don't bother - * releasing the freelist lock. - */ - error = xfs_qm_dqflush(dqp, 0); - if (error) { - xfs_warn(mp, "%s: dquot %p flush failed", - __func__, dqp); - } - goto dqunlock; - } - - /* - * We're trying to get the hashlock out of order. This races - * with dqlookup; so, we giveup and goto the next dquot if - * we couldn't get the hashlock. This way, we won't starve - * a dqlookup process that holds the hashlock that is - * waiting for the freelist lock. - */ - if (!mutex_trylock(&dqp->q_hash->qh_lock)) { - restarts++; - goto dqfunlock; - } - - /* - * This races with dquot allocation code as well as dqflush_all - * and reclaim code. So, if we failed to grab the mplist lock, - * giveup everything and start over. - */ - if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { - restarts++; - startagain = 1; - goto qhunlock; - } - - ASSERT(dqp->q_nrefs == 0); - list_del_init(&dqp->q_mplist); - mp->m_quotainfo->qi_dquots--; - mp->m_quotainfo->qi_dqreclaims++; - list_del_init(&dqp->q_hashlist); - dqp->q_hash->qh_version++; - list_del_init(&dqp->q_freelist); - xfs_Gqm->qm_dqfrlist_cnt--; - dqpout = dqp; - mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); -qhunlock: - mutex_unlock(&dqp->q_hash->qh_lock); -dqfunlock: - xfs_dqfunlock(dqp); -dqunlock: - xfs_dqunlock(dqp); - if (dqpout) - break; - if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) - break; - if (startagain) { - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - goto again; - } - } - mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); - return dqpout; -} - -/* - * Traverse the freelist of dquots and attempt to reclaim a maximum of - * 'howmany' dquots. This operation races with dqlookup(), and attempts to - * favor the lookup function ... - */ -STATIC int -xfs_qm_shake_freelist( - int howmany) -{ - int nreclaimed = 0; - xfs_dquot_t *dqp; - - if (howmany <= 0) - return 0; - - while (nreclaimed < howmany) { - dqp = xfs_qm_dqreclaim_one(); - if (!dqp) - return nreclaimed; - xfs_qm_dqdestroy(dqp); - nreclaimed++; - } - return nreclaimed; -} - -/* - * The kmem_shake interface is invoked when memory is running low. - */ -/* ARGSUSED */ -STATIC int -xfs_qm_shake( - struct shrinker *shrink, - struct shrink_control *sc) -{ - int ndqused, nfree, n; - gfp_t gfp_mask = sc->gfp_mask; - - if (!kmem_shake_allow(gfp_mask)) - return 0; - if (!xfs_Gqm) - return 0; - - nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */ - /* incore dquots in all f/s's */ - ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; - - ASSERT(ndqused >= 0); - - if (nfree <= ndqused && nfree < ndquot) - return 0; - - ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ - n = nfree - ndqused - ndquot; /* # over target */ - - return xfs_qm_shake_freelist(MAX(nfree, n)); -} - - -/*------------------------------------------------------------------*/ - -/* - * Return a new incore dquot. Depending on the number of - * dquots in the system, we either allocate a new one on the kernel heap, - * or reclaim a free one. - * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed - * to reclaim an existing one from the freelist. - */ -boolean_t -xfs_qm_dqalloc_incore( - xfs_dquot_t **O_dqpp) -{ - xfs_dquot_t *dqp; - - /* - * Check against high water mark to see if we want to pop - * a nincompoop dquot off the freelist. - */ - if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) { - /* - * Try to recycle a dquot from the freelist. - */ - if ((dqp = xfs_qm_dqreclaim_one())) { - XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); - /* - * Just zero the core here. The rest will get - * reinitialized by caller. XXX we shouldn't even - * do this zero ... - */ - memset(&dqp->q_core, 0, sizeof(dqp->q_core)); - *O_dqpp = dqp; - return B_FALSE; - } - XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); - } - - /* - * Allocate a brand new dquot on the kernel heap and return it - * to the caller to initialize. - */ - ASSERT(xfs_Gqm->qm_dqzone != NULL); - *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); - atomic_inc(&xfs_Gqm->qm_totaldquots); - - return B_TRUE; -} - - -/* - * Start a transaction and write the incore superblock changes to - * disk. flags parameter indicates which fields have changed. - */ -int -xfs_qm_write_sb_changes( - xfs_mount_t *mp, - __int64_t flags) -{ - xfs_trans_t *tp; - int error; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); - if ((error = xfs_trans_reserve(tp, 0, - mp->m_sb.sb_sectsize + 128, 0, - 0, - XFS_DEFAULT_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - return error; - } - - xfs_mod_sb(tp, flags); - error = xfs_trans_commit(tp, 0); - - return error; -} - - -/* --------------- utility functions for vnodeops ---------------- */ - - -/* - * Given an inode, a uid, gid and prid make sure that we have - * allocated relevant dquot(s) on disk, and that we won't exceed inode - * quotas by creating this file. - * This also attaches dquot(s) to the given inode after locking it, - * and returns the dquots corresponding to the uid and/or gid. - * - * in : inode (unlocked) - * out : udquot, gdquot with references taken and unlocked - */ -int -xfs_qm_vop_dqalloc( - struct xfs_inode *ip, - uid_t uid, - gid_t gid, - prid_t prid, - uint flags, - struct xfs_dquot **O_udqpp, - struct xfs_dquot **O_gdqpp) -{ - struct xfs_mount *mp = ip->i_mount; - struct xfs_dquot *uq, *gq; - int error; - uint lockflags; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; - - lockflags = XFS_ILOCK_EXCL; - xfs_ilock(ip, lockflags); - - if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip)) - gid = ip->i_d.di_gid; - - /* - * Attach the dquot(s) to this inode, doing a dquot allocation - * if necessary. The dquot(s) will not be locked. - */ - if (XFS_NOT_DQATTACHED(mp, ip)) { - error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC); - if (error) { - xfs_iunlock(ip, lockflags); - return error; - } - } - - uq = gq = NULL; - if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { - if (ip->i_d.di_uid != uid) { - /* - * What we need is the dquot that has this uid, and - * if we send the inode to dqget, the uid of the inode - * takes priority over what's sent in the uid argument. - * We must unlock inode here before calling dqget if - * we're not sending the inode, because otherwise - * we'll deadlock by doing trans_reserve while - * holding ilock. - */ - xfs_iunlock(ip, lockflags); - if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, - XFS_DQ_USER, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, - &uq))) { - ASSERT(error != ENOENT); - return error; - } - /* - * Get the ilock in the right order. - */ - xfs_dqunlock(uq); - lockflags = XFS_ILOCK_SHARED; - xfs_ilock(ip, lockflags); - } else { - /* - * Take an extra reference, because we'll return - * this to caller - */ - ASSERT(ip->i_udquot); - uq = ip->i_udquot; - xfs_dqlock(uq); - XFS_DQHOLD(uq); - xfs_dqunlock(uq); - } - } - if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { - if (ip->i_d.di_gid != gid) { - xfs_iunlock(ip, lockflags); - if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, - XFS_DQ_GROUP, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, - &gq))) { - if (uq) - xfs_qm_dqrele(uq); - ASSERT(error != ENOENT); - return error; - } - xfs_dqunlock(gq); - lockflags = XFS_ILOCK_SHARED; - xfs_ilock(ip, lockflags); - } else { - ASSERT(ip->i_gdquot); - gq = ip->i_gdquot; - xfs_dqlock(gq); - XFS_DQHOLD(gq); - xfs_dqunlock(gq); - } - } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { - if (xfs_get_projid(ip) != prid) { - xfs_iunlock(ip, lockflags); - if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, - XFS_DQ_PROJ, - XFS_QMOPT_DQALLOC | - XFS_QMOPT_DOWARN, - &gq))) { - if (uq) - xfs_qm_dqrele(uq); - ASSERT(error != ENOENT); - return (error); - } - xfs_dqunlock(gq); - lockflags = XFS_ILOCK_SHARED; - xfs_ilock(ip, lockflags); - } else { - ASSERT(ip->i_gdquot); - gq = ip->i_gdquot; - xfs_dqlock(gq); - XFS_DQHOLD(gq); - xfs_dqunlock(gq); - } - } - if (uq) - trace_xfs_dquot_dqalloc(ip); - - xfs_iunlock(ip, lockflags); - if (O_udqpp) - *O_udqpp = uq; - else if (uq) - xfs_qm_dqrele(uq); - if (O_gdqpp) - *O_gdqpp = gq; - else if (gq) - xfs_qm_dqrele(gq); - return 0; -} - -/* - * Actually transfer ownership, and do dquot modifications. - * These were already reserved. - */ -xfs_dquot_t * -xfs_qm_vop_chown( - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_dquot_t **IO_olddq, - xfs_dquot_t *newdq) -{ - xfs_dquot_t *prevdq; - uint bfield = XFS_IS_REALTIME_INODE(ip) ? - XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; - - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); - - /* old dquot */ - prevdq = *IO_olddq; - ASSERT(prevdq); - ASSERT(prevdq != newdq); - - xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks)); - xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); - - /* the sparkling new dquot */ - xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); - xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); - - /* - * Take an extra reference, because the inode - * is going to keep this dquot pointer even - * after the trans_commit. - */ - xfs_dqlock(newdq); - XFS_DQHOLD(newdq); - xfs_dqunlock(newdq); - *IO_olddq = newdq; - - return prevdq; -} - -/* - * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID). - */ -int -xfs_qm_vop_chown_reserve( - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_dquot_t *udqp, - xfs_dquot_t *gdqp, - uint flags) -{ - xfs_mount_t *mp = ip->i_mount; - uint delblks, blkflags, prjflags = 0; - xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; - int error; - - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - delblks = ip->i_delayed_blks; - delblksudq = delblksgdq = unresudq = unresgdq = NULL; - blkflags = XFS_IS_REALTIME_INODE(ip) ? - XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; - - if (XFS_IS_UQUOTA_ON(mp) && udqp && - ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { - delblksudq = udqp; - /* - * If there are delayed allocation blocks, then we have to - * unreserve those from the old dquot, and add them to the - * new dquot. - */ - if (delblks) { - ASSERT(ip->i_udquot); - unresudq = ip->i_udquot; - } - } - if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { - if (XFS_IS_PQUOTA_ON(ip->i_mount) && - xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) - prjflags = XFS_QMOPT_ENOSPC; - - if (prjflags || - (XFS_IS_GQUOTA_ON(ip->i_mount) && - ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { - delblksgdq = gdqp; - if (delblks) { - ASSERT(ip->i_gdquot); - unresgdq = ip->i_gdquot; - } - } - } - - if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, - delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, - flags | blkflags | prjflags))) - return (error); - - /* - * Do the delayed blks reservations/unreservations now. Since, these - * are done without the help of a transaction, if a reservation fails - * its previous reservations won't be automatically undone by trans - * code. So, we have to do it manually here. - */ - if (delblks) { - /* - * Do the reservations first. Unreservation can't fail. - */ - ASSERT(delblksudq || delblksgdq); - ASSERT(unresudq || unresgdq); - if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, - delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, - flags | blkflags | prjflags))) - return (error); - xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, - unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, - blkflags); - } - - return (0); -} - -int -xfs_qm_vop_rename_dqattach( - struct xfs_inode **i_tab) -{ - struct xfs_mount *mp = i_tab[0]->i_mount; - int i; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; - - for (i = 0; (i < 4 && i_tab[i]); i++) { - struct xfs_inode *ip = i_tab[i]; - int error; - - /* - * Watch out for duplicate entries in the table. - */ - if (i == 0 || ip != i_tab[i-1]) { - if (XFS_NOT_DQATTACHED(mp, ip)) { - error = xfs_qm_dqattach(ip, 0); - if (error) - return error; - } - } - } - return 0; -} - -void -xfs_qm_vop_create_dqattach( - struct xfs_trans *tp, - struct xfs_inode *ip, - struct xfs_dquot *udqp, - struct xfs_dquot *gdqp) -{ - struct xfs_mount *mp = tp->t_mountp; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return; - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT(XFS_IS_QUOTA_RUNNING(mp)); - - if (udqp) { - xfs_dqlock(udqp); - XFS_DQHOLD(udqp); - xfs_dqunlock(udqp); - ASSERT(ip->i_udquot == NULL); - ip->i_udquot = udqp; - ASSERT(XFS_IS_UQUOTA_ON(mp)); - ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); - xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); - } - if (gdqp) { - xfs_dqlock(gdqp); - XFS_DQHOLD(gdqp); - xfs_dqunlock(gdqp); - ASSERT(ip->i_gdquot == NULL); - ip->i_gdquot = gdqp; - ASSERT(XFS_IS_OQUOTA_ON(mp)); - ASSERT((XFS_IS_GQUOTA_ON(mp) ? - ip->i_d.di_gid : xfs_get_projid(ip)) == - be32_to_cpu(gdqp->q_core.d_id)); - xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); - } -} - diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h deleted file mode 100644 index 43b9abe1052c..000000000000 --- a/fs/xfs/quota/xfs_qm.h +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_QM_H__ -#define __XFS_QM_H__ - -#include "xfs_dquot_item.h" -#include "xfs_dquot.h" -#include "xfs_quota_priv.h" -#include "xfs_qm_stats.h" - -struct xfs_qm; -struct xfs_inode; - -extern uint ndquot; -extern struct mutex xfs_Gqm_lock; -extern struct xfs_qm *xfs_Gqm; -extern kmem_zone_t *qm_dqzone; -extern kmem_zone_t *qm_dqtrxzone; - -/* - * Used in xfs_qm_sync called by xfs_sync to count the max times that it can - * iterate over the mountpt's dquot list in one call. - */ -#define XFS_QM_SYNC_MAX_RESTARTS 7 - -/* - * Ditto, for xfs_qm_dqreclaim_one. - */ -#define XFS_QM_RECLAIM_MAX_RESTARTS 4 - -/* - * Ideal ratio of free to in use dquots. Quota manager makes an attempt - * to keep this balance. - */ -#define XFS_QM_DQFREE_RATIO 2 - -/* - * Dquot hashtable constants/threshold values. - */ -#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) -#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t)) - -/* - * This defines the unit of allocation of dquots. - * Currently, it is just one file system block, and a 4K blk contains 30 - * (136 * 30 = 4080) dquots. It's probably not worth trying to make - * this more dynamic. - * XXXsup However, if this number is changed, we have to make sure that we don't - * implicitly assume that we do allocations in chunks of a single filesystem - * block in the dquot/xqm code. - */ -#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 - -typedef xfs_dqhash_t xfs_dqlist_t; - -/* - * Quota Manager (global) structure. Lives only in core. - */ -typedef struct xfs_qm { - xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ - xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ - uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ - struct list_head qm_dqfrlist; /* freelist of dquots */ - struct mutex qm_dqfrlist_lock; - int qm_dqfrlist_cnt; - atomic_t qm_totaldquots; /* total incore dquots */ - uint qm_nrefs; /* file systems with quota on */ - int qm_dqfree_ratio;/* ratio of free to inuse dquots */ - kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ - kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ -} xfs_qm_t; - -/* - * Various quota information for individual filesystems. - * The mount structure keeps a pointer to this. - */ -typedef struct xfs_quotainfo { - xfs_inode_t *qi_uquotaip; /* user quota inode */ - xfs_inode_t *qi_gquotaip; /* group quota inode */ - struct list_head qi_dqlist; /* all dquots in filesys */ - struct mutex qi_dqlist_lock; - int qi_dquots; - int qi_dqreclaims; /* a change here indicates - a removal in the dqlist */ - time_t qi_btimelimit; /* limit for blks timer */ - time_t qi_itimelimit; /* limit for inodes timer */ - time_t qi_rtbtimelimit;/* limit for rt blks timer */ - xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ - xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ - xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ - struct mutex qi_quotaofflock;/* to serialize quotaoff */ - xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ - uint qi_dqperchunk; /* # ondisk dqs in above chunk */ - xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ - xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ - xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ - xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ - xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ - xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ -} xfs_quotainfo_t; - - -extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); -extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, - xfs_dquot_t *, xfs_dquot_t *, long, long, uint); -extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); -extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); - -/* - * We keep the usr and grp dquots separately so that locking will be easier - * to do at commit time. All transactions that we know of at this point - * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. - */ -#define XFS_QM_TRANS_MAXDQS 2 -typedef struct xfs_dquot_acct { - xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; - xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; -} xfs_dquot_acct_t; - -/* - * Users are allowed to have a usage exceeding their softlimit for - * a period this long. - */ -#define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */ -#define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */ -#define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */ - -#define XFS_QM_BWARNLIMIT 5 -#define XFS_QM_IWARNLIMIT 5 -#define XFS_QM_RTBWARNLIMIT 5 - -extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); -extern int xfs_qm_quotacheck(xfs_mount_t *); -extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); - -/* dquot stuff */ -extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); -extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); -extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); - -/* quota ops */ -extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); -extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, - fs_disk_quota_t *); -extern int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint, - fs_disk_quota_t *); -extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); -extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); -extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); - -#endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c deleted file mode 100644 index a0a829addca9..000000000000 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_qm.h" - - -STATIC void -xfs_fill_statvfs_from_dquot( - struct kstatfs *statp, - xfs_disk_dquot_t *dp) -{ - __uint64_t limit; - - limit = dp->d_blk_softlimit ? - be64_to_cpu(dp->d_blk_softlimit) : - be64_to_cpu(dp->d_blk_hardlimit); - if (limit && statp->f_blocks > limit) { - statp->f_blocks = limit; - statp->f_bfree = statp->f_bavail = - (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? - (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; - } - - limit = dp->d_ino_softlimit ? - be64_to_cpu(dp->d_ino_softlimit) : - be64_to_cpu(dp->d_ino_hardlimit); - if (limit && statp->f_files > limit) { - statp->f_files = limit; - statp->f_ffree = - (statp->f_files > be64_to_cpu(dp->d_icount)) ? - (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; - } -} - - -/* - * Directory tree accounting is implemented using project quotas, where - * the project identifier is inherited from parent directories. - * A statvfs (df, etc.) of a directory that is using project quota should - * return a statvfs of the project, not the entire filesystem. - * This makes such trees appear as if they are filesystems in themselves. - */ -void -xfs_qm_statvfs( - xfs_inode_t *ip, - struct kstatfs *statp) -{ - xfs_mount_t *mp = ip->i_mount; - xfs_dquot_t *dqp; - - if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { - xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); - xfs_qm_dqput(dqp); - } -} - -int -xfs_qm_newmount( - xfs_mount_t *mp, - uint *needquotamount, - uint *quotaflags) -{ - uint quotaondisk; - uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; - - quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) && - (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); - - if (quotaondisk) { - uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT; - pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT; - gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT; - } - - /* - * If the device itself is read-only, we can't allow - * the user to change the state of quota on the mount - - * this would generate a transaction on the ro device, - * which would lead to an I/O error and shutdown - */ - - if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || - (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || - (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || - (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) || - (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || - (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && - xfs_dev_is_read_only(mp, "changing quota state")) { - xfs_warn(mp, "please mount with%s%s%s%s.", - (!quotaondisk ? "out quota" : ""), - (uquotaondisk ? " usrquota" : ""), - (pquotaondisk ? " prjquota" : ""), - (gquotaondisk ? " grpquota" : "")); - return XFS_ERROR(EPERM); - } - - if (XFS_IS_QUOTA_ON(mp) || quotaondisk) { - /* - * Call mount_quotas at this point only if we won't have to do - * a quotacheck. - */ - if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { - /* - * If an error occurred, qm_mount_quotas code - * has already disabled quotas. So, just finish - * mounting, and get on with the boring life - * without disk quotas. - */ - xfs_qm_mount_quotas(mp); - } else { - /* - * Clear the quota flags, but remember them. This - * is so that the quota code doesn't get invoked - * before we're ready. This can happen when an - * inode goes inactive and wants to free blocks, - * or via xfs_log_mount_finish. - */ - *needquotamount = B_TRUE; - *quotaflags = mp->m_qflags; - mp->m_qflags = 0; - } - } - - return 0; -} - -void __init -xfs_qm_init(void) -{ - printk(KERN_INFO "SGI XFS Quota Management subsystem\n"); - mutex_init(&xfs_Gqm_lock); - xfs_qm_init_procfs(); -} - -void __exit -xfs_qm_exit(void) -{ - xfs_qm_cleanup_procfs(); - if (qm_dqzone) - kmem_zone_destroy(qm_dqzone); - if (qm_dqtrxzone) - kmem_zone_destroy(qm_dqtrxzone); -} diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c deleted file mode 100644 index 8671a0b32644..000000000000 --- a/fs/xfs/quota/xfs_qm_stats.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_qm.h" - -struct xqmstats xqmstats; - -static int xqm_proc_show(struct seq_file *m, void *v) -{ - /* maximum; incore; ratio free to inuse; freelist */ - seq_printf(m, "%d\t%d\t%d\t%u\n", - ndquot, - xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, - xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, - xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); - return 0; -} - -static int xqm_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xqm_proc_show, NULL); -} - -static const struct file_operations xqm_proc_fops = { - .owner = THIS_MODULE, - .open = xqm_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int xqmstat_proc_show(struct seq_file *m, void *v) -{ - /* quota performance statistics */ - seq_printf(m, "qm %u %u %u %u %u %u %u %u\n", - xqmstats.xs_qm_dqreclaims, - xqmstats.xs_qm_dqreclaim_misses, - xqmstats.xs_qm_dquot_dups, - xqmstats.xs_qm_dqcachemisses, - xqmstats.xs_qm_dqcachehits, - xqmstats.xs_qm_dqwants, - xqmstats.xs_qm_dqshake_reclaims, - xqmstats.xs_qm_dqinact_reclaims); - return 0; -} - -static int xqmstat_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, xqmstat_proc_show, NULL); -} - -static const struct file_operations xqmstat_proc_fops = { - .owner = THIS_MODULE, - .open = xqmstat_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -void -xfs_qm_init_procfs(void) -{ - proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops); - proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops); -} - -void -xfs_qm_cleanup_procfs(void) -{ - remove_proc_entry("fs/xfs/xqm", NULL); - remove_proc_entry("fs/xfs/xqmstat", NULL); -} diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h deleted file mode 100644 index 5b964fc0dc09..000000000000 --- a/fs/xfs/quota/xfs_qm_stats.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2002 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_QM_STATS_H__ -#define __XFS_QM_STATS_H__ - -#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) - -/* - * XQM global statistics - */ -struct xqmstats { - __uint32_t xs_qm_dqreclaims; - __uint32_t xs_qm_dqreclaim_misses; - __uint32_t xs_qm_dquot_dups; - __uint32_t xs_qm_dqcachemisses; - __uint32_t xs_qm_dqcachehits; - __uint32_t xs_qm_dqwants; - __uint32_t xs_qm_dqshake_reclaims; - __uint32_t xs_qm_dqinact_reclaims; -}; - -extern struct xqmstats xqmstats; - -# define XQM_STATS_INC(count) ( (count)++ ) - -extern void xfs_qm_init_procfs(void); -extern void xfs_qm_cleanup_procfs(void); - -#else - -# define XQM_STATS_INC(count) do { } while (0) - -static inline void xfs_qm_init_procfs(void) { }; -static inline void xfs_qm_cleanup_procfs(void) { }; - -#endif - -#endif /* __XFS_QM_STATS_H__ */ diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c deleted file mode 100644 index 609246f42e6c..000000000000 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ /dev/null @@ -1,906 +0,0 @@ -/* - * Copyright (c) 2000-2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include - -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_utils.h" -#include "xfs_qm.h" -#include "xfs_trace.h" - -STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); -STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, - uint); -STATIC uint xfs_qm_export_flags(uint); -STATIC uint xfs_qm_export_qtype_flags(uint); -STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *, - fs_disk_quota_t *); - - -/* - * Turn off quota accounting and/or enforcement for all udquots and/or - * gdquots. Called only at unmount time. - * - * This assumes that there are no dquots of this file system cached - * incore, and modifies the ondisk dquot directly. Therefore, for example, - * it is an error to call this twice, without purging the cache. - */ -int -xfs_qm_scall_quotaoff( - xfs_mount_t *mp, - uint flags) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - uint dqtype; - int error; - uint inactivate_flags; - xfs_qoff_logitem_t *qoffstart; - int nculprits; - - /* - * No file system can have quotas enabled on disk but not in core. - * Note that quota utilities (like quotaoff) _expect_ - * errno == EEXIST here. - */ - if ((mp->m_qflags & flags) == 0) - return XFS_ERROR(EEXIST); - error = 0; - - flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); - - /* - * We don't want to deal with two quotaoffs messing up each other, - * so we're going to serialize it. quotaoff isn't exactly a performance - * critical thing. - * If quotaoff, then we must be dealing with the root filesystem. - */ - ASSERT(q); - mutex_lock(&q->qi_quotaofflock); - - /* - * If we're just turning off quota enforcement, change mp and go. - */ - if ((flags & XFS_ALL_QUOTA_ACCT) == 0) { - mp->m_qflags &= ~(flags); - - spin_lock(&mp->m_sb_lock); - mp->m_sb.sb_qflags = mp->m_qflags; - spin_unlock(&mp->m_sb_lock); - mutex_unlock(&q->qi_quotaofflock); - - /* XXX what to do if error ? Revert back to old vals incore ? */ - error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); - return (error); - } - - dqtype = 0; - inactivate_flags = 0; - /* - * If accounting is off, we must turn enforcement off, clear the - * quota 'CHKD' certificate to make it known that we have to - * do a quotacheck the next time this quota is turned on. - */ - if (flags & XFS_UQUOTA_ACCT) { - dqtype |= XFS_QMOPT_UQUOTA; - flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD); - inactivate_flags |= XFS_UQUOTA_ACTIVE; - } - if (flags & XFS_GQUOTA_ACCT) { - dqtype |= XFS_QMOPT_GQUOTA; - flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); - inactivate_flags |= XFS_GQUOTA_ACTIVE; - } else if (flags & XFS_PQUOTA_ACCT) { - dqtype |= XFS_QMOPT_PQUOTA; - flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); - inactivate_flags |= XFS_PQUOTA_ACTIVE; - } - - /* - * Nothing to do? Don't complain. This happens when we're just - * turning off quota enforcement. - */ - if ((mp->m_qflags & flags) == 0) - goto out_unlock; - - /* - * Write the LI_QUOTAOFF log record, and do SB changes atomically, - * and synchronously. If we fail to write, we should abort the - * operation as it cannot be recovered safely if we crash. - */ - error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); - if (error) - goto out_unlock; - - /* - * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct - * to take care of the race between dqget and quotaoff. We don't take - * any special locks to reset these bits. All processes need to check - * these bits *after* taking inode lock(s) to see if the particular - * quota type is in the process of being turned off. If *ACTIVE, it is - * guaranteed that all dquot structures and all quotainode ptrs will all - * stay valid as long as that inode is kept locked. - * - * There is no turning back after this. - */ - mp->m_qflags &= ~inactivate_flags; - - /* - * Give back all the dquot reference(s) held by inodes. - * Here we go thru every single incore inode in this file system, and - * do a dqrele on the i_udquot/i_gdquot that it may have. - * Essentially, as long as somebody has an inode locked, this guarantees - * that quotas will not be turned off. This is handy because in a - * transaction once we lock the inode(s) and check for quotaon, we can - * depend on the quota inodes (and other things) being valid as long as - * we keep the lock(s). - */ - xfs_qm_dqrele_all_inodes(mp, flags); - - /* - * Next we make the changes in the quota flag in the mount struct. - * This isn't protected by a particular lock directly, because we - * don't want to take a mrlock every time we depend on quotas being on. - */ - mp->m_qflags &= ~(flags); - - /* - * Go through all the dquots of this file system and purge them, - * according to what was turned off. We may not be able to get rid - * of all dquots, because dquots can have temporary references that - * are not attached to inodes. eg. xfs_setattr, xfs_create. - * So, if we couldn't purge all the dquots from the filesystem, - * we can't get rid of the incore data structures. - */ - while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype))) - delay(10 * nculprits); - - /* - * Transactions that had started before ACTIVE state bit was cleared - * could have logged many dquots, so they'd have higher LSNs than - * the first QUOTAOFF log record does. If we happen to crash when - * the tail of the log has gone past the QUOTAOFF record, but - * before the last dquot modification, those dquots __will__ - * recover, and that's not good. - * - * So, we have QUOTAOFF start and end logitems; the start - * logitem won't get overwritten until the end logitem appears... - */ - error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags); - if (error) { - /* We're screwed now. Shutdown is the only option. */ - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); - goto out_unlock; - } - - /* - * If quotas is completely disabled, close shop. - */ - if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || - ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { - mutex_unlock(&q->qi_quotaofflock); - xfs_qm_destroy_quotainfo(mp); - return (0); - } - - /* - * Release our quotainode references if we don't need them anymore. - */ - if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) { - IRELE(q->qi_uquotaip); - q->qi_uquotaip = NULL; - } - if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) { - IRELE(q->qi_gquotaip); - q->qi_gquotaip = NULL; - } - -out_unlock: - mutex_unlock(&q->qi_quotaofflock); - return error; -} - -STATIC int -xfs_qm_scall_trunc_qfile( - struct xfs_mount *mp, - xfs_ino_t ino) -{ - struct xfs_inode *ip; - struct xfs_trans *tp; - int error; - - if (ino == NULLFSINO) - return 0; - - error = xfs_iget(mp, NULL, ino, 0, 0, &ip); - if (error) - return error; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); - error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, - XFS_ITRUNCATE_LOG_COUNT); - if (error) { - xfs_trans_cancel(tp, 0); - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - goto out_put; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - xfs_trans_ijoin(tp, ip); - - error = xfs_itruncate_data(&tp, ip, 0); - if (error) { - xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT); - goto out_unlock; - } - - xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - -out_unlock: - xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); -out_put: - IRELE(ip); - return error; -} - -int -xfs_qm_scall_trunc_qfiles( - xfs_mount_t *mp, - uint flags) -{ - int error = 0, error2 = 0; - - if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { - xfs_debug(mp, "%s: flags=%x m_qflags=%x\n", - __func__, flags, mp->m_qflags); - return XFS_ERROR(EINVAL); - } - - if (flags & XFS_DQ_USER) - error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); - if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) - error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); - - return error ? error : error2; -} - -/* - * Switch on (a given) quota enforcement for a filesystem. This takes - * effect immediately. - * (Switching on quota accounting must be done at mount time.) - */ -int -xfs_qm_scall_quotaon( - xfs_mount_t *mp, - uint flags) -{ - int error; - uint qf; - __int64_t sbflags; - - flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); - /* - * Switching on quota accounting must be done at mount time. - */ - flags &= ~(XFS_ALL_QUOTA_ACCT); - - sbflags = 0; - - if (flags == 0) { - xfs_debug(mp, "%s: zero flags, m_qflags=%x\n", - __func__, mp->m_qflags); - return XFS_ERROR(EINVAL); - } - - /* No fs can turn on quotas with a delayed effect */ - ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0); - - /* - * Can't enforce without accounting. We check the superblock - * qflags here instead of m_qflags because rootfs can have - * quota acct on ondisk without m_qflags' knowing. - */ - if (((flags & XFS_UQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && - (flags & XFS_UQUOTA_ENFD)) - || - ((flags & XFS_PQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && - (flags & XFS_GQUOTA_ACCT) == 0 && - (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && - (flags & XFS_OQUOTA_ENFD))) { - xfs_debug(mp, - "%s: Can't enforce without acct, flags=%x sbflags=%x\n", - __func__, flags, mp->m_sb.sb_qflags); - return XFS_ERROR(EINVAL); - } - /* - * If everything's up to-date incore, then don't waste time. - */ - if ((mp->m_qflags & flags) == flags) - return XFS_ERROR(EEXIST); - - /* - * Change sb_qflags on disk but not incore mp->qflags - * if this is the root filesystem. - */ - spin_lock(&mp->m_sb_lock); - qf = mp->m_sb.sb_qflags; - mp->m_sb.sb_qflags = qf | flags; - spin_unlock(&mp->m_sb_lock); - - /* - * There's nothing to change if it's the same. - */ - if ((qf & flags) == flags && sbflags == 0) - return XFS_ERROR(EEXIST); - sbflags |= XFS_SB_QFLAGS; - - if ((error = xfs_qm_write_sb_changes(mp, sbflags))) - return (error); - /* - * If we aren't trying to switch on quota enforcement, we are done. - */ - if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) != - (mp->m_qflags & XFS_UQUOTA_ACCT)) || - ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != - (mp->m_qflags & XFS_PQUOTA_ACCT)) || - ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != - (mp->m_qflags & XFS_GQUOTA_ACCT)) || - (flags & XFS_ALL_QUOTA_ENFD) == 0) - return (0); - - if (! XFS_IS_QUOTA_RUNNING(mp)) - return XFS_ERROR(ESRCH); - - /* - * Switch on quota enforcement in core. - */ - mutex_lock(&mp->m_quotainfo->qi_quotaofflock); - mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); - mutex_unlock(&mp->m_quotainfo->qi_quotaofflock); - - return (0); -} - - -/* - * Return quota status information, such as uquota-off, enforcements, etc. - */ -int -xfs_qm_scall_getqstat( - struct xfs_mount *mp, - struct fs_quota_stat *out) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - struct xfs_inode *uip, *gip; - boolean_t tempuqip, tempgqip; - - uip = gip = NULL; - tempuqip = tempgqip = B_FALSE; - memset(out, 0, sizeof(fs_quota_stat_t)); - - out->qs_version = FS_QSTAT_VERSION; - if (!xfs_sb_version_hasquota(&mp->m_sb)) { - out->qs_uquota.qfs_ino = NULLFSINO; - out->qs_gquota.qfs_ino = NULLFSINO; - return (0); - } - out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & - (XFS_ALL_QUOTA_ACCT| - XFS_ALL_QUOTA_ENFD)); - out->qs_pad = 0; - out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino; - out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; - - if (q) { - uip = q->qi_uquotaip; - gip = q->qi_gquotaip; - } - if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { - if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, - 0, 0, &uip) == 0) - tempuqip = B_TRUE; - } - if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { - if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, - 0, 0, &gip) == 0) - tempgqip = B_TRUE; - } - if (uip) { - out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; - out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; - if (tempuqip) - IRELE(uip); - } - if (gip) { - out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; - out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; - if (tempgqip) - IRELE(gip); - } - if (q) { - out->qs_incoredqs = q->qi_dquots; - out->qs_btimelimit = q->qi_btimelimit; - out->qs_itimelimit = q->qi_itimelimit; - out->qs_rtbtimelimit = q->qi_rtbtimelimit; - out->qs_bwarnlimit = q->qi_bwarnlimit; - out->qs_iwarnlimit = q->qi_iwarnlimit; - } - return 0; -} - -#define XFS_DQ_MASK \ - (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) - -/* - * Adjust quota limits, and start/stop timers accordingly. - */ -int -xfs_qm_scall_setqlim( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type, - fs_disk_quota_t *newlim) -{ - struct xfs_quotainfo *q = mp->m_quotainfo; - xfs_disk_dquot_t *ddq; - xfs_dquot_t *dqp; - xfs_trans_t *tp; - int error; - xfs_qcnt_t hard, soft; - - if (newlim->d_fieldmask & ~XFS_DQ_MASK) - return EINVAL; - if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) - return 0; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); - if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, - 0, 0, XFS_DEFAULT_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - return (error); - } - - /* - * We don't want to race with a quotaoff so take the quotaoff lock. - * (We don't hold an inode lock, so there's nothing else to stop - * a quotaoff from happening). (XXXThis doesn't currently happen - * because we take the vfslock before calling xfs_qm_sysent). - */ - mutex_lock(&q->qi_quotaofflock); - - /* - * Get the dquot (locked), and join it to the transaction. - * Allocate the dquot if this doesn't exist. - */ - if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { - xfs_trans_cancel(tp, XFS_TRANS_ABORT); - ASSERT(error != ENOENT); - goto out_unlock; - } - xfs_trans_dqjoin(tp, dqp); - ddq = &dqp->q_core; - - /* - * Make sure that hardlimits are >= soft limits before changing. - */ - hard = (newlim->d_fieldmask & FS_DQ_BHARD) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) : - be64_to_cpu(ddq->d_blk_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) : - be64_to_cpu(ddq->d_blk_softlimit); - if (hard == 0 || hard >= soft) { - ddq->d_blk_hardlimit = cpu_to_be64(hard); - ddq->d_blk_softlimit = cpu_to_be64(soft); - if (id == 0) { - q->qi_bhardlimit = hard; - q->qi_bsoftlimit = soft; - } - } else { - xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); - } - hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : - be64_to_cpu(ddq->d_rtb_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ? - (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) : - be64_to_cpu(ddq->d_rtb_softlimit); - if (hard == 0 || hard >= soft) { - ddq->d_rtb_hardlimit = cpu_to_be64(hard); - ddq->d_rtb_softlimit = cpu_to_be64(soft); - if (id == 0) { - q->qi_rtbhardlimit = hard; - q->qi_rtbsoftlimit = soft; - } - } else { - xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); - } - - hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? - (xfs_qcnt_t) newlim->d_ino_hardlimit : - be64_to_cpu(ddq->d_ino_hardlimit); - soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ? - (xfs_qcnt_t) newlim->d_ino_softlimit : - be64_to_cpu(ddq->d_ino_softlimit); - if (hard == 0 || hard >= soft) { - ddq->d_ino_hardlimit = cpu_to_be64(hard); - ddq->d_ino_softlimit = cpu_to_be64(soft); - if (id == 0) { - q->qi_ihardlimit = hard; - q->qi_isoftlimit = soft; - } - } else { - xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); - } - - /* - * Update warnings counter(s) if requested - */ - if (newlim->d_fieldmask & FS_DQ_BWARNS) - ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns); - if (newlim->d_fieldmask & FS_DQ_IWARNS) - ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns); - if (newlim->d_fieldmask & FS_DQ_RTBWARNS) - ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns); - - if (id == 0) { - /* - * Timelimits for the super user set the relative time - * the other users can be over quota for this file system. - * If it is zero a default is used. Ditto for the default - * soft and hard limit values (already done, above), and - * for warnings. - */ - if (newlim->d_fieldmask & FS_DQ_BTIMER) { - q->qi_btimelimit = newlim->d_btimer; - ddq->d_btimer = cpu_to_be32(newlim->d_btimer); - } - if (newlim->d_fieldmask & FS_DQ_ITIMER) { - q->qi_itimelimit = newlim->d_itimer; - ddq->d_itimer = cpu_to_be32(newlim->d_itimer); - } - if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { - q->qi_rtbtimelimit = newlim->d_rtbtimer; - ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); - } - if (newlim->d_fieldmask & FS_DQ_BWARNS) - q->qi_bwarnlimit = newlim->d_bwarns; - if (newlim->d_fieldmask & FS_DQ_IWARNS) - q->qi_iwarnlimit = newlim->d_iwarns; - if (newlim->d_fieldmask & FS_DQ_RTBWARNS) - q->qi_rtbwarnlimit = newlim->d_rtbwarns; - } else { - /* - * If the user is now over quota, start the timelimit. - * The user will not be 'warned'. - * Note that we keep the timers ticking, whether enforcement - * is on or off. We don't really want to bother with iterating - * over all ondisk dquots and turning the timers on/off. - */ - xfs_qm_adjust_dqtimers(mp, ddq); - } - dqp->dq_flags |= XFS_DQ_DIRTY; - xfs_trans_log_dquot(tp, dqp); - - error = xfs_trans_commit(tp, 0); - xfs_qm_dqrele(dqp); - - out_unlock: - mutex_unlock(&q->qi_quotaofflock); - return error; -} - -int -xfs_qm_scall_getquota( - xfs_mount_t *mp, - xfs_dqid_t id, - uint type, - fs_disk_quota_t *out) -{ - xfs_dquot_t *dqp; - int error; - - /* - * Try to get the dquot. We don't want it allocated on disk, so - * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't - * exist, we'll get ENOENT back. - */ - if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) { - return (error); - } - - /* - * If everything's NULL, this dquot doesn't quite exist as far as - * our utility programs are concerned. - */ - if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { - xfs_qm_dqput(dqp); - return XFS_ERROR(ENOENT); - } - /* - * Convert the disk dquot to the exportable format - */ - xfs_qm_export_dquot(mp, &dqp->q_core, out); - xfs_qm_dqput(dqp); - return (error ? XFS_ERROR(EFAULT) : 0); -} - - -STATIC int -xfs_qm_log_quotaoff_end( - xfs_mount_t *mp, - xfs_qoff_logitem_t *startqoff, - uint flags) -{ - xfs_trans_t *tp; - int error; - xfs_qoff_logitem_t *qoffi; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); - - if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2, - 0, 0, XFS_DEFAULT_LOG_COUNT))) { - xfs_trans_cancel(tp, 0); - return (error); - } - - qoffi = xfs_trans_get_qoff_item(tp, startqoff, - flags & XFS_ALL_QUOTA_ACCT); - xfs_trans_log_quotaoff_item(tp, qoffi); - - /* - * We have to make sure that the transaction is secure on disk before we - * return and actually stop quota accounting. So, make it synchronous. - * We don't care about quotoff's performance. - */ - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - return (error); -} - - -STATIC int -xfs_qm_log_quotaoff( - xfs_mount_t *mp, - xfs_qoff_logitem_t **qoffstartp, - uint flags) -{ - xfs_trans_t *tp; - int error; - xfs_qoff_logitem_t *qoffi=NULL; - uint oldsbqflag=0; - - tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); - if ((error = xfs_trans_reserve(tp, 0, - sizeof(xfs_qoff_logitem_t) * 2 + - mp->m_sb.sb_sectsize + 128, - 0, - 0, - XFS_DEFAULT_LOG_COUNT))) { - goto error0; - } - - qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); - xfs_trans_log_quotaoff_item(tp, qoffi); - - spin_lock(&mp->m_sb_lock); - oldsbqflag = mp->m_sb.sb_qflags; - mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; - spin_unlock(&mp->m_sb_lock); - - xfs_mod_sb(tp, XFS_SB_QFLAGS); - - /* - * We have to make sure that the transaction is secure on disk before we - * return and actually stop quota accounting. So, make it synchronous. - * We don't care about quotoff's performance. - */ - xfs_trans_set_sync(tp); - error = xfs_trans_commit(tp, 0); - -error0: - if (error) { - xfs_trans_cancel(tp, 0); - /* - * No one else is modifying sb_qflags, so this is OK. - * We still hold the quotaofflock. - */ - spin_lock(&mp->m_sb_lock); - mp->m_sb.sb_qflags = oldsbqflag; - spin_unlock(&mp->m_sb_lock); - } - *qoffstartp = qoffi; - return (error); -} - - -/* - * Translate an internal style on-disk-dquot to the exportable format. - * The main differences are that the counters/limits are all in Basic - * Blocks (BBs) instead of the internal FSBs, and all on-disk data has - * to be converted to the native endianness. - */ -STATIC void -xfs_qm_export_dquot( - xfs_mount_t *mp, - xfs_disk_dquot_t *src, - struct fs_disk_quota *dst) -{ - memset(dst, 0, sizeof(*dst)); - dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */ - dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags); - dst->d_id = be32_to_cpu(src->d_id); - dst->d_blk_hardlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit)); - dst->d_blk_softlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit)); - dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit); - dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit); - dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount)); - dst->d_icount = be64_to_cpu(src->d_icount); - dst->d_btimer = be32_to_cpu(src->d_btimer); - dst->d_itimer = be32_to_cpu(src->d_itimer); - dst->d_iwarns = be16_to_cpu(src->d_iwarns); - dst->d_bwarns = be16_to_cpu(src->d_bwarns); - dst->d_rtb_hardlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit)); - dst->d_rtb_softlimit = - XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit)); - dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount)); - dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer); - dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns); - - /* - * Internally, we don't reset all the timers when quota enforcement - * gets turned off. No need to confuse the user level code, - * so return zeroes in that case. - */ - if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || - (!XFS_IS_OQUOTA_ENFORCED(mp) && - (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { - dst->d_btimer = 0; - dst->d_itimer = 0; - dst->d_rtbtimer = 0; - } - -#ifdef DEBUG - if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || - (XFS_IS_OQUOTA_ENFORCED(mp) && - (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && - dst->d_id != 0) { - if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && - (dst->d_blk_softlimit > 0)) { - ASSERT(dst->d_btimer != 0); - } - if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) && - (dst->d_ino_softlimit > 0)) { - ASSERT(dst->d_itimer != 0); - } - } -#endif -} - -STATIC uint -xfs_qm_export_qtype_flags( - uint flags) -{ - /* - * Can't be more than one, or none. - */ - ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) != - (FS_PROJ_QUOTA | FS_USER_QUOTA)); - ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) != - (FS_PROJ_QUOTA | FS_GROUP_QUOTA)); - ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) != - (FS_USER_QUOTA | FS_GROUP_QUOTA)); - ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0); - - return (flags & XFS_DQ_USER) ? - FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? - FS_PROJ_QUOTA : FS_GROUP_QUOTA; -} - -STATIC uint -xfs_qm_export_flags( - uint flags) -{ - uint uflags; - - uflags = 0; - if (flags & XFS_UQUOTA_ACCT) - uflags |= FS_QUOTA_UDQ_ACCT; - if (flags & XFS_PQUOTA_ACCT) - uflags |= FS_QUOTA_PDQ_ACCT; - if (flags & XFS_GQUOTA_ACCT) - uflags |= FS_QUOTA_GDQ_ACCT; - if (flags & XFS_UQUOTA_ENFD) - uflags |= FS_QUOTA_UDQ_ENFD; - if (flags & (XFS_OQUOTA_ENFD)) { - uflags |= (flags & XFS_GQUOTA_ACCT) ? - FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; - } - return (uflags); -} - - -STATIC int -xfs_dqrele_inode( - struct xfs_inode *ip, - struct xfs_perag *pag, - int flags) -{ - /* skip quota inodes */ - if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || - ip == ip->i_mount->m_quotainfo->qi_gquotaip) { - ASSERT(ip->i_udquot == NULL); - ASSERT(ip->i_gdquot == NULL); - return 0; - } - - xfs_ilock(ip, XFS_ILOCK_EXCL); - if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { - xfs_qm_dqrele(ip->i_udquot); - ip->i_udquot = NULL; - } - if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { - xfs_qm_dqrele(ip->i_gdquot); - ip->i_gdquot = NULL; - } - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return 0; -} - - -/* - * Go thru all the inodes in the file system, releasing their dquots. - * - * Note that the mount structure gets modified to indicate that quotas are off - * AFTER this, in the case of quotaoff. - */ -void -xfs_qm_dqrele_all_inodes( - struct xfs_mount *mp, - uint flags) -{ - ASSERT(mp->m_quotainfo); - xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); -} diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h deleted file mode 100644 index 94a3d927d716..000000000000 --- a/fs/xfs/quota/xfs_quota_priv.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_QUOTA_PRIV_H__ -#define __XFS_QUOTA_PRIV_H__ - -/* - * Number of bmaps that we ask from bmapi when doing a quotacheck. - * We make this restriction to keep the memory usage to a minimum. - */ -#define XFS_DQITER_MAP_SIZE 10 - -/* - * Hash into a bucket in the dquot hash table, based on . - */ -#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ - (__psunsigned_t)(id)) & \ - (xfs_Gqm->qm_dqhashmask - 1)) -#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \ - (xfs_Gqm->qm_usr_dqhtable + \ - XFS_DQ_HASHVAL(mp, id)) : \ - (xfs_Gqm->qm_grp_dqhtable + \ - XFS_DQ_HASHVAL(mp, id))) -#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ - !dqp->q_core.d_blk_hardlimit && \ - !dqp->q_core.d_blk_softlimit && \ - !dqp->q_core.d_rtb_hardlimit && \ - !dqp->q_core.d_rtb_softlimit && \ - !dqp->q_core.d_ino_hardlimit && \ - !dqp->q_core.d_ino_softlimit && \ - !dqp->q_core.d_bcount && \ - !dqp->q_core.d_rtbcount && \ - !dqp->q_core.d_icount) - -#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ - (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ - (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) - -#endif /* __XFS_QUOTA_PRIV_H__ */ diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c deleted file mode 100644 index 4d00ee67792d..000000000000 --- a/fs/xfs/quota/xfs_trans_dquot.c +++ /dev/null @@ -1,890 +0,0 @@ -/* - * Copyright (c) 2000-2002 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_fs.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h" -#include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" -#include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_trans_priv.h" -#include "xfs_qm.h" - -STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *); - -/* - * Add the locked dquot to the transaction. - * The dquot must be locked, and it cannot be associated with any - * transaction. - */ -void -xfs_trans_dqjoin( - xfs_trans_t *tp, - xfs_dquot_t *dqp) -{ - ASSERT(dqp->q_transp != tp); - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(dqp->q_logitem.qli_dquot == dqp); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); - - /* - * Initialize d_transp so we can later determine if this dquot is - * associated with this transaction. - */ - dqp->q_transp = tp; -} - - -/* - * This is called to mark the dquot as needing - * to be logged when the transaction is committed. The dquot must - * already be associated with the given transaction. - * Note that it marks the entire transaction as dirty. In the ordinary - * case, this gets called via xfs_trans_commit, after the transaction - * is already dirty. However, there's nothing stop this from getting - * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY - * flag. - */ -void -xfs_trans_log_dquot( - xfs_trans_t *tp, - xfs_dquot_t *dqp) -{ - ASSERT(dqp->q_transp == tp); - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - - tp->t_flags |= XFS_TRANS_DIRTY; - dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY; -} - -/* - * Carry forward whatever is left of the quota blk reservation to - * the spanky new transaction - */ -void -xfs_trans_dup_dqinfo( - xfs_trans_t *otp, - xfs_trans_t *ntp) -{ - xfs_dqtrx_t *oq, *nq; - int i,j; - xfs_dqtrx_t *oqa, *nqa; - - if (!otp->t_dqinfo) - return; - - xfs_trans_alloc_dqinfo(ntp); - oqa = otp->t_dqinfo->dqa_usrdquots; - nqa = ntp->t_dqinfo->dqa_usrdquots; - - /* - * Because the quota blk reservation is carried forward, - * it is also necessary to carry forward the DQ_DIRTY flag. - */ - if(otp->t_flags & XFS_TRANS_DQ_DIRTY) - ntp->t_flags |= XFS_TRANS_DQ_DIRTY; - - for (j = 0; j < 2; j++) { - for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { - if (oqa[i].qt_dquot == NULL) - break; - oq = &oqa[i]; - nq = &nqa[i]; - - nq->qt_dquot = oq->qt_dquot; - nq->qt_bcount_delta = nq->qt_icount_delta = 0; - nq->qt_rtbcount_delta = 0; - - /* - * Transfer whatever is left of the reservations. - */ - nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used; - oq->qt_blk_res = oq->qt_blk_res_used; - - nq->qt_rtblk_res = oq->qt_rtblk_res - - oq->qt_rtblk_res_used; - oq->qt_rtblk_res = oq->qt_rtblk_res_used; - - nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used; - oq->qt_ino_res = oq->qt_ino_res_used; - - } - oqa = otp->t_dqinfo->dqa_grpdquots; - nqa = ntp->t_dqinfo->dqa_grpdquots; - } -} - -/* - * Wrap around mod_dquot to account for both user and group quotas. - */ -void -xfs_trans_mod_dquot_byino( - xfs_trans_t *tp, - xfs_inode_t *ip, - uint field, - long delta) -{ - xfs_mount_t *mp = tp->t_mountp; - - if (!XFS_IS_QUOTA_RUNNING(mp) || - !XFS_IS_QUOTA_ON(mp) || - ip->i_ino == mp->m_sb.sb_uquotino || - ip->i_ino == mp->m_sb.sb_gquotino) - return; - - if (tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - - if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) - (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); - if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot) - (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); -} - -STATIC xfs_dqtrx_t * -xfs_trans_get_dqtrx( - xfs_trans_t *tp, - xfs_dquot_t *dqp) -{ - int i; - xfs_dqtrx_t *qa; - - qa = XFS_QM_ISUDQ(dqp) ? - tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots; - - for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { - if (qa[i].qt_dquot == NULL || - qa[i].qt_dquot == dqp) - return &qa[i]; - } - - return NULL; -} - -/* - * Make the changes in the transaction structure. - * The moral equivalent to xfs_trans_mod_sb(). - * We don't touch any fields in the dquot, so we don't care - * if it's locked or not (most of the time it won't be). - */ -void -xfs_trans_mod_dquot( - xfs_trans_t *tp, - xfs_dquot_t *dqp, - uint field, - long delta) -{ - xfs_dqtrx_t *qtrx; - - ASSERT(tp); - ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); - qtrx = NULL; - - if (tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - /* - * Find either the first free slot or the slot that belongs - * to this dquot. - */ - qtrx = xfs_trans_get_dqtrx(tp, dqp); - ASSERT(qtrx); - if (qtrx->qt_dquot == NULL) - qtrx->qt_dquot = dqp; - - switch (field) { - - /* - * regular disk blk reservation - */ - case XFS_TRANS_DQ_RES_BLKS: - qtrx->qt_blk_res += (ulong)delta; - break; - - /* - * inode reservation - */ - case XFS_TRANS_DQ_RES_INOS: - qtrx->qt_ino_res += (ulong)delta; - break; - - /* - * disk blocks used. - */ - case XFS_TRANS_DQ_BCOUNT: - if (qtrx->qt_blk_res && delta > 0) { - qtrx->qt_blk_res_used += (ulong)delta; - ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used); - } - qtrx->qt_bcount_delta += delta; - break; - - case XFS_TRANS_DQ_DELBCOUNT: - qtrx->qt_delbcnt_delta += delta; - break; - - /* - * Inode Count - */ - case XFS_TRANS_DQ_ICOUNT: - if (qtrx->qt_ino_res && delta > 0) { - qtrx->qt_ino_res_used += (ulong)delta; - ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used); - } - qtrx->qt_icount_delta += delta; - break; - - /* - * rtblk reservation - */ - case XFS_TRANS_DQ_RES_RTBLKS: - qtrx->qt_rtblk_res += (ulong)delta; - break; - - /* - * rtblk count - */ - case XFS_TRANS_DQ_RTBCOUNT: - if (qtrx->qt_rtblk_res && delta > 0) { - qtrx->qt_rtblk_res_used += (ulong)delta; - ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used); - } - qtrx->qt_rtbcount_delta += delta; - break; - - case XFS_TRANS_DQ_DELRTBCOUNT: - qtrx->qt_delrtb_delta += delta; - break; - - default: - ASSERT(0); - } - tp->t_flags |= XFS_TRANS_DQ_DIRTY; -} - - -/* - * Given an array of dqtrx structures, lock all the dquots associated - * and join them to the transaction, provided they have been modified. - * We know that the highest number of dquots (of one type - usr OR grp), - * involved in a transaction is 2 and that both usr and grp combined - 3. - * So, we don't attempt to make this very generic. - */ -STATIC void -xfs_trans_dqlockedjoin( - xfs_trans_t *tp, - xfs_dqtrx_t *q) -{ - ASSERT(q[0].qt_dquot != NULL); - if (q[1].qt_dquot == NULL) { - xfs_dqlock(q[0].qt_dquot); - xfs_trans_dqjoin(tp, q[0].qt_dquot); - } else { - ASSERT(XFS_QM_TRANS_MAXDQS == 2); - xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot); - xfs_trans_dqjoin(tp, q[0].qt_dquot); - xfs_trans_dqjoin(tp, q[1].qt_dquot); - } -} - - -/* - * Called by xfs_trans_commit() and similar in spirit to - * xfs_trans_apply_sb_deltas(). - * Go thru all the dquots belonging to this transaction and modify the - * INCORE dquot to reflect the actual usages. - * Unreserve just the reservations done by this transaction. - * dquot is still left locked at exit. - */ -void -xfs_trans_apply_dquot_deltas( - xfs_trans_t *tp) -{ - int i, j; - xfs_dquot_t *dqp; - xfs_dqtrx_t *qtrx, *qa; - xfs_disk_dquot_t *d; - long totalbdelta; - long totalrtbdelta; - - if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY)) - return; - - ASSERT(tp->t_dqinfo); - qa = tp->t_dqinfo->dqa_usrdquots; - for (j = 0; j < 2; j++) { - if (qa[0].qt_dquot == NULL) { - qa = tp->t_dqinfo->dqa_grpdquots; - continue; - } - - /* - * Lock all of the dquots and join them to the transaction. - */ - xfs_trans_dqlockedjoin(tp, qa); - - for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { - qtrx = &qa[i]; - /* - * The array of dquots is filled - * sequentially, not sparsely. - */ - if ((dqp = qtrx->qt_dquot) == NULL) - break; - - ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(dqp->q_transp == tp); - - /* - * adjust the actual number of blocks used - */ - d = &dqp->q_core; - - /* - * The issue here is - sometimes we don't make a blkquota - * reservation intentionally to be fair to users - * (when the amount is small). On the other hand, - * delayed allocs do make reservations, but that's - * outside of a transaction, so we have no - * idea how much was really reserved. - * So, here we've accumulated delayed allocation blks and - * non-delay blks. The assumption is that the - * delayed ones are always reserved (outside of a - * transaction), and the others may or may not have - * quota reservations. - */ - totalbdelta = qtrx->qt_bcount_delta + - qtrx->qt_delbcnt_delta; - totalrtbdelta = qtrx->qt_rtbcount_delta + - qtrx->qt_delrtb_delta; -#ifdef DEBUG - if (totalbdelta < 0) - ASSERT(be64_to_cpu(d->d_bcount) >= - -totalbdelta); - - if (totalrtbdelta < 0) - ASSERT(be64_to_cpu(d->d_rtbcount) >= - -totalrtbdelta); - - if (qtrx->qt_icount_delta < 0) - ASSERT(be64_to_cpu(d->d_icount) >= - -qtrx->qt_icount_delta); -#endif - if (totalbdelta) - be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); - - if (qtrx->qt_icount_delta) - be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta); - - if (totalrtbdelta) - be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta); - - /* - * Get any default limits in use. - * Start/reset the timer(s) if needed. - */ - if (d->d_id) { - xfs_qm_adjust_dqlimits(tp->t_mountp, d); - xfs_qm_adjust_dqtimers(tp->t_mountp, d); - } - - dqp->dq_flags |= XFS_DQ_DIRTY; - /* - * add this to the list of items to get logged - */ - xfs_trans_log_dquot(tp, dqp); - /* - * Take off what's left of the original reservation. - * In case of delayed allocations, there's no - * reservation that a transaction structure knows of. - */ - if (qtrx->qt_blk_res != 0) { - if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) { - if (qtrx->qt_blk_res > - qtrx->qt_blk_res_used) - dqp->q_res_bcount -= (xfs_qcnt_t) - (qtrx->qt_blk_res - - qtrx->qt_blk_res_used); - else - dqp->q_res_bcount -= (xfs_qcnt_t) - (qtrx->qt_blk_res_used - - qtrx->qt_blk_res); - } - } else { - /* - * These blks were never reserved, either inside - * a transaction or outside one (in a delayed - * allocation). Also, this isn't always a - * negative number since we sometimes - * deliberately skip quota reservations. - */ - if (qtrx->qt_bcount_delta) { - dqp->q_res_bcount += - (xfs_qcnt_t)qtrx->qt_bcount_delta; - } - } - /* - * Adjust the RT reservation. - */ - if (qtrx->qt_rtblk_res != 0) { - if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) { - if (qtrx->qt_rtblk_res > - qtrx->qt_rtblk_res_used) - dqp->q_res_rtbcount -= (xfs_qcnt_t) - (qtrx->qt_rtblk_res - - qtrx->qt_rtblk_res_used); - else - dqp->q_res_rtbcount -= (xfs_qcnt_t) - (qtrx->qt_rtblk_res_used - - qtrx->qt_rtblk_res); - } - } else { - if (qtrx->qt_rtbcount_delta) - dqp->q_res_rtbcount += - (xfs_qcnt_t)qtrx->qt_rtbcount_delta; - } - - /* - * Adjust the inode reservation. - */ - if (qtrx->qt_ino_res != 0) { - ASSERT(qtrx->qt_ino_res >= - qtrx->qt_ino_res_used); - if (qtrx->qt_ino_res > qtrx->qt_ino_res_used) - dqp->q_res_icount -= (xfs_qcnt_t) - (qtrx->qt_ino_res - - qtrx->qt_ino_res_used); - } else { - if (qtrx->qt_icount_delta) - dqp->q_res_icount += - (xfs_qcnt_t)qtrx->qt_icount_delta; - } - - ASSERT(dqp->q_res_bcount >= - be64_to_cpu(dqp->q_core.d_bcount)); - ASSERT(dqp->q_res_icount >= - be64_to_cpu(dqp->q_core.d_icount)); - ASSERT(dqp->q_res_rtbcount >= - be64_to_cpu(dqp->q_core.d_rtbcount)); - } - /* - * Do the group quotas next - */ - qa = tp->t_dqinfo->dqa_grpdquots; - } -} - -/* - * Release the reservations, and adjust the dquots accordingly. - * This is called only when the transaction is being aborted. If by - * any chance we have done dquot modifications incore (ie. deltas) already, - * we simply throw those away, since that's the expected behavior - * when a transaction is curtailed without a commit. - */ -void -xfs_trans_unreserve_and_mod_dquots( - xfs_trans_t *tp) -{ - int i, j; - xfs_dquot_t *dqp; - xfs_dqtrx_t *qtrx, *qa; - boolean_t locked; - - if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) - return; - - qa = tp->t_dqinfo->dqa_usrdquots; - - for (j = 0; j < 2; j++) { - for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { - qtrx = &qa[i]; - /* - * We assume that the array of dquots is filled - * sequentially, not sparsely. - */ - if ((dqp = qtrx->qt_dquot) == NULL) - break; - /* - * Unreserve the original reservation. We don't care - * about the number of blocks used field, or deltas. - * Also we don't bother to zero the fields. - */ - locked = B_FALSE; - if (qtrx->qt_blk_res) { - xfs_dqlock(dqp); - locked = B_TRUE; - dqp->q_res_bcount -= - (xfs_qcnt_t)qtrx->qt_blk_res; - } - if (qtrx->qt_ino_res) { - if (!locked) { - xfs_dqlock(dqp); - locked = B_TRUE; - } - dqp->q_res_icount -= - (xfs_qcnt_t)qtrx->qt_ino_res; - } - - if (qtrx->qt_rtblk_res) { - if (!locked) { - xfs_dqlock(dqp); - locked = B_TRUE; - } - dqp->q_res_rtbcount -= - (xfs_qcnt_t)qtrx->qt_rtblk_res; - } - if (locked) - xfs_dqunlock(dqp); - - } - qa = tp->t_dqinfo->dqa_grpdquots; - } -} - -STATIC void -xfs_quota_warn( - struct xfs_mount *mp, - struct xfs_dquot *dqp, - int type) -{ - /* no warnings for project quotas - we just return ENOSPC later */ - if (dqp->dq_flags & XFS_DQ_PROJ) - return; - quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA, - be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev, - type); -} - -/* - * This reserves disk blocks and inodes against a dquot. - * Flags indicate if the dquot is to be locked here and also - * if the blk reservation is for RT or regular blocks. - * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check. - */ -STATIC int -xfs_trans_dqresv( - xfs_trans_t *tp, - xfs_mount_t *mp, - xfs_dquot_t *dqp, - long nblks, - long ninos, - uint flags) -{ - xfs_qcnt_t hardlimit; - xfs_qcnt_t softlimit; - time_t timer; - xfs_qwarncnt_t warns; - xfs_qwarncnt_t warnlimit; - xfs_qcnt_t count; - xfs_qcnt_t *resbcountp; - xfs_quotainfo_t *q = mp->m_quotainfo; - - - xfs_dqlock(dqp); - - if (flags & XFS_TRANS_DQ_RES_BLKS) { - hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); - if (!hardlimit) - hardlimit = q->qi_bhardlimit; - softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit); - if (!softlimit) - softlimit = q->qi_bsoftlimit; - timer = be32_to_cpu(dqp->q_core.d_btimer); - warns = be16_to_cpu(dqp->q_core.d_bwarns); - warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit; - resbcountp = &dqp->q_res_bcount; - } else { - ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); - hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit); - if (!hardlimit) - hardlimit = q->qi_rtbhardlimit; - softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit); - if (!softlimit) - softlimit = q->qi_rtbsoftlimit; - timer = be32_to_cpu(dqp->q_core.d_rtbtimer); - warns = be16_to_cpu(dqp->q_core.d_rtbwarns); - warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit; - resbcountp = &dqp->q_res_rtbcount; - } - - if ((flags & XFS_QMOPT_FORCE_RES) == 0 && - dqp->q_core.d_id && - ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || - (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && - (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { - if (nblks > 0) { - /* - * dquot is locked already. See if we'd go over the - * hardlimit or exceed the timelimit if we allocate - * nblks. - */ - if (hardlimit > 0ULL && - hardlimit <= nblks + *resbcountp) { - xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); - goto error_return; - } - if (softlimit > 0ULL && - softlimit <= nblks + *resbcountp) { - if ((timer != 0 && get_seconds() > timer) || - (warns != 0 && warns >= warnlimit)) { - xfs_quota_warn(mp, dqp, - QUOTA_NL_BSOFTLONGWARN); - goto error_return; - } - - xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN); - } - } - if (ninos > 0) { - count = be64_to_cpu(dqp->q_core.d_icount); - timer = be32_to_cpu(dqp->q_core.d_itimer); - warns = be16_to_cpu(dqp->q_core.d_iwarns); - warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; - hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); - if (!hardlimit) - hardlimit = q->qi_ihardlimit; - softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); - if (!softlimit) - softlimit = q->qi_isoftlimit; - - if (hardlimit > 0ULL && count >= hardlimit) { - xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); - goto error_return; - } - if (softlimit > 0ULL && count >= softlimit) { - if ((timer != 0 && get_seconds() > timer) || - (warns != 0 && warns >= warnlimit)) { - xfs_quota_warn(mp, dqp, - QUOTA_NL_ISOFTLONGWARN); - goto error_return; - } - xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN); - } - } - } - - /* - * Change the reservation, but not the actual usage. - * Note that q_res_bcount = q_core.d_bcount + resv - */ - (*resbcountp) += (xfs_qcnt_t)nblks; - if (ninos != 0) - dqp->q_res_icount += (xfs_qcnt_t)ninos; - - /* - * note the reservation amt in the trans struct too, - * so that the transaction knows how much was reserved by - * it against this particular dquot. - * We don't do this when we are reserving for a delayed allocation, - * because we don't have the luxury of a transaction envelope then. - */ - if (tp) { - ASSERT(tp->t_dqinfo); - ASSERT(flags & XFS_QMOPT_RESBLK_MASK); - if (nblks != 0) - xfs_trans_mod_dquot(tp, dqp, - flags & XFS_QMOPT_RESBLK_MASK, - nblks); - if (ninos != 0) - xfs_trans_mod_dquot(tp, dqp, - XFS_TRANS_DQ_RES_INOS, - ninos); - } - ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount)); - ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); - ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); - - xfs_dqunlock(dqp); - return 0; - -error_return: - xfs_dqunlock(dqp); - if (flags & XFS_QMOPT_ENOSPC) - return ENOSPC; - return EDQUOT; -} - - -/* - * Given dquot(s), make disk block and/or inode reservations against them. - * The fact that this does the reservation against both the usr and - * grp/prj quotas is important, because this follows a both-or-nothing - * approach. - * - * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. - * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota. - * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks - * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks - * dquots are unlocked on return, if they were not locked by caller. - */ -int -xfs_trans_reserve_quota_bydquots( - xfs_trans_t *tp, - xfs_mount_t *mp, - xfs_dquot_t *udqp, - xfs_dquot_t *gdqp, - long nblks, - long ninos, - uint flags) -{ - int resvd = 0, error; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; - - if (tp && tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - - ASSERT(flags & XFS_QMOPT_RESBLK_MASK); - - if (udqp) { - error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, - (flags & ~XFS_QMOPT_ENOSPC)); - if (error) - return error; - resvd = 1; - } - - if (gdqp) { - error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); - if (error) { - /* - * can't do it, so backout previous reservation - */ - if (resvd) { - flags |= XFS_QMOPT_FORCE_RES; - xfs_trans_dqresv(tp, mp, udqp, - -nblks, -ninos, flags); - } - return error; - } - } - - /* - * Didn't change anything critical, so, no need to log - */ - return 0; -} - - -/* - * Lock the dquot and change the reservation if we can. - * This doesn't change the actual usage, just the reservation. - * The inode sent in is locked. - */ -int -xfs_trans_reserve_quota_nblks( - struct xfs_trans *tp, - struct xfs_inode *ip, - long nblks, - long ninos, - uint flags) -{ - struct xfs_mount *mp = ip->i_mount; - - if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) - return 0; - if (XFS_IS_PQUOTA_ON(mp)) - flags |= XFS_QMOPT_ENOSPC; - - ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); - ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); - - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); - ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == - XFS_TRANS_DQ_RES_RTBLKS || - (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == - XFS_TRANS_DQ_RES_BLKS); - - /* - * Reserve nblks against these dquots, with trans as the mediator. - */ - return xfs_trans_reserve_quota_bydquots(tp, mp, - ip->i_udquot, ip->i_gdquot, - nblks, ninos, flags); -} - -/* - * This routine is called to allocate a quotaoff log item. - */ -xfs_qoff_logitem_t * -xfs_trans_get_qoff_item( - xfs_trans_t *tp, - xfs_qoff_logitem_t *startqoff, - uint flags) -{ - xfs_qoff_logitem_t *q; - - ASSERT(tp != NULL); - - q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags); - ASSERT(q != NULL); - - /* - * Get a log_item_desc to point at the new item. - */ - xfs_trans_add_item(tp, &q->qql_item); - return q; -} - - -/* - * This is called to mark the quotaoff logitem as needing - * to be logged when the transaction is committed. The logitem must - * already be associated with the given transaction. - */ -void -xfs_trans_log_quotaoff_item( - xfs_trans_t *tp, - xfs_qoff_logitem_t *qlp) -{ - tp->t_flags |= XFS_TRANS_DIRTY; - qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY; -} - -STATIC void -xfs_trans_alloc_dqinfo( - xfs_trans_t *tp) -{ - tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); -} - -void -xfs_trans_free_dqinfo( - xfs_trans_t *tp) -{ - if (!tp->t_dqinfo) - return; - kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); - tp->t_dqinfo = NULL; -} diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c deleted file mode 100644 index b83f76b6d410..000000000000 --- a/fs/xfs/support/uuid.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include - -/* IRIX interpretation of an uuid_t */ -typedef struct { - __be32 uu_timelow; - __be16 uu_timemid; - __be16 uu_timehi; - __be16 uu_clockseq; - __be16 uu_node[3]; -} xfs_uu_t; - -/* - * uuid_getnodeuniq - obtain the node unique fields of a UUID. - * - * This is not in any way a standard or condoned UUID function; - * it just something that's needed for user-level file handles. - */ -void -uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) -{ - xfs_uu_t *uup = (xfs_uu_t *)uuid; - - fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) | - be16_to_cpu(uup->uu_timemid); - fsid[1] = be32_to_cpu(uup->uu_timelow); -} - -int -uuid_is_nil(uuid_t *uuid) -{ - int i; - char *cp = (char *)uuid; - - if (uuid == NULL) - return 0; - /* implied check of version number here... */ - for (i = 0; i < sizeof *uuid; i++) - if (*cp++) return 0; /* not nil */ - return 1; /* is nil */ -} - -int -uuid_equal(uuid_t *uuid1, uuid_t *uuid2) -{ - return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; -} diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h deleted file mode 100644 index 4732d71262cc..000000000000 --- a/fs/xfs/support/uuid.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_UUID_H__ -#define __XFS_SUPPORT_UUID_H__ - -typedef struct { - unsigned char __u_bits[16]; -} uuid_t; - -extern int uuid_is_nil(uuid_t *uuid); -extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); -extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); - -#endif /* __XFS_SUPPORT_UUID_H__ */ diff --git a/fs/xfs/time.h b/fs/xfs/time.h new file mode 100644 index 000000000000..387e695a184c --- /dev/null +++ b/fs/xfs/time.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPPORT_TIME_H__ +#define __XFS_SUPPORT_TIME_H__ + +#include +#include + +typedef struct timespec timespec_t; + +static inline void delay(long ticks) +{ + schedule_timeout_uninterruptible(ticks); +} + +static inline void nanotime(struct timespec *tvp) +{ + *tvp = CURRENT_TIME; +} + +#endif /* __XFS_SUPPORT_TIME_H__ */ diff --git a/fs/xfs/uuid.c b/fs/xfs/uuid.c new file mode 100644 index 000000000000..b83f76b6d410 --- /dev/null +++ b/fs/xfs/uuid.c @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include + +/* IRIX interpretation of an uuid_t */ +typedef struct { + __be32 uu_timelow; + __be16 uu_timemid; + __be16 uu_timehi; + __be16 uu_clockseq; + __be16 uu_node[3]; +} xfs_uu_t; + +/* + * uuid_getnodeuniq - obtain the node unique fields of a UUID. + * + * This is not in any way a standard or condoned UUID function; + * it just something that's needed for user-level file handles. + */ +void +uuid_getnodeuniq(uuid_t *uuid, int fsid [2]) +{ + xfs_uu_t *uup = (xfs_uu_t *)uuid; + + fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) | + be16_to_cpu(uup->uu_timemid); + fsid[1] = be32_to_cpu(uup->uu_timelow); +} + +int +uuid_is_nil(uuid_t *uuid) +{ + int i; + char *cp = (char *)uuid; + + if (uuid == NULL) + return 0; + /* implied check of version number here... */ + for (i = 0; i < sizeof *uuid; i++) + if (*cp++) return 0; /* not nil */ + return 1; /* is nil */ +} + +int +uuid_equal(uuid_t *uuid1, uuid_t *uuid2) +{ + return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1; +} diff --git a/fs/xfs/uuid.h b/fs/xfs/uuid.h new file mode 100644 index 000000000000..4732d71262cc --- /dev/null +++ b/fs/xfs/uuid.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPPORT_UUID_H__ +#define __XFS_SUPPORT_UUID_H__ + +typedef struct { + unsigned char __u_bits[16]; +} uuid_t; + +extern int uuid_is_nil(uuid_t *uuid); +extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); +extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]); + +#endif /* __XFS_SUPPORT_UUID_H__ */ diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c new file mode 100644 index 000000000000..b6c4b3795c4a --- /dev/null +++ b/fs/xfs/xfs_acl.c @@ -0,0 +1,420 @@ +/* + * Copyright (c) 2008, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_acl.h" +#include "xfs_attr.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_vnodeops.h" +#include "xfs_trace.h" +#include +#include +#include + + +/* + * Locking scheme: + * - all ACL updates are protected by inode->i_mutex, which is taken before + * calling into this file. + */ + +STATIC struct posix_acl * +xfs_acl_from_disk(struct xfs_acl *aclp) +{ + struct posix_acl_entry *acl_e; + struct posix_acl *acl; + struct xfs_acl_entry *ace; + int count, i; + + count = be32_to_cpu(aclp->acl_cnt); + + acl = posix_acl_alloc(count, GFP_KERNEL); + if (!acl) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < count; i++) { + acl_e = &acl->a_entries[i]; + ace = &aclp->acl_entry[i]; + + /* + * The tag is 32 bits on disk and 16 bits in core. + * + * Because every access to it goes through the core + * format first this is not a problem. + */ + acl_e->e_tag = be32_to_cpu(ace->ae_tag); + acl_e->e_perm = be16_to_cpu(ace->ae_perm); + + switch (acl_e->e_tag) { + case ACL_USER: + case ACL_GROUP: + acl_e->e_id = be32_to_cpu(ace->ae_id); + break; + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + acl_e->e_id = ACL_UNDEFINED_ID; + break; + default: + goto fail; + } + } + return acl; + +fail: + posix_acl_release(acl); + return ERR_PTR(-EINVAL); +} + +STATIC void +xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl) +{ + const struct posix_acl_entry *acl_e; + struct xfs_acl_entry *ace; + int i; + + aclp->acl_cnt = cpu_to_be32(acl->a_count); + for (i = 0; i < acl->a_count; i++) { + ace = &aclp->acl_entry[i]; + acl_e = &acl->a_entries[i]; + + ace->ae_tag = cpu_to_be32(acl_e->e_tag); + ace->ae_id = cpu_to_be32(acl_e->e_id); + ace->ae_perm = cpu_to_be16(acl_e->e_perm); + } +} + +struct posix_acl * +xfs_get_acl(struct inode *inode, int type) +{ + struct xfs_inode *ip = XFS_I(inode); + struct posix_acl *acl; + struct xfs_acl *xfs_acl; + int len = sizeof(struct xfs_acl); + unsigned char *ea_name; + int error; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + trace_xfs_get_acl(ip); + + switch (type) { + case ACL_TYPE_ACCESS: + ea_name = SGI_ACL_FILE; + break; + case ACL_TYPE_DEFAULT: + ea_name = SGI_ACL_DEFAULT; + break; + default: + BUG(); + } + + /* + * If we have a cached ACLs value just return it, not need to + * go out to the disk. + */ + + xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); + if (!xfs_acl) + return ERR_PTR(-ENOMEM); + + error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl, + &len, ATTR_ROOT); + if (error) { + /* + * If the attribute doesn't exist make sure we have a negative + * cache entry, for any other error assume it is transient and + * leave the cache entry as ACL_NOT_CACHED. + */ + if (error == -ENOATTR) { + acl = NULL; + goto out_update_cache; + } + goto out; + } + + acl = xfs_acl_from_disk(xfs_acl); + if (IS_ERR(acl)) + goto out; + + out_update_cache: + set_cached_acl(inode, type, acl); + out: + kfree(xfs_acl); + return acl; +} + +STATIC int +xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct xfs_inode *ip = XFS_I(inode); + unsigned char *ea_name; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + ea_name = SGI_ACL_FILE; + break; + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + ea_name = SGI_ACL_DEFAULT; + break; + default: + return -EINVAL; + } + + if (acl) { + struct xfs_acl *xfs_acl; + int len; + + xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL); + if (!xfs_acl) + return -ENOMEM; + + xfs_acl_to_disk(xfs_acl, acl); + len = sizeof(struct xfs_acl) - + (sizeof(struct xfs_acl_entry) * + (XFS_ACL_MAX_ENTRIES - acl->a_count)); + + error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl, + len, ATTR_ROOT); + + kfree(xfs_acl); + } else { + /* + * A NULL ACL argument means we want to remove the ACL. + */ + error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT); + + /* + * If the attribute didn't exist to start with that's fine. + */ + if (error == -ENOATTR) + error = 0; + } + + if (!error) + set_cached_acl(inode, type, acl); + return error; +} + +static int +xfs_set_mode(struct inode *inode, umode_t mode) +{ + int error = 0; + + if (mode != inode->i_mode) { + struct iattr iattr; + + iattr.ia_valid = ATTR_MODE | ATTR_CTIME; + iattr.ia_mode = mode; + iattr.ia_ctime = current_fs_time(inode->i_sb); + + error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); + } + + return error; +} + +static int +xfs_acl_exists(struct inode *inode, unsigned char *name) +{ + int len = sizeof(struct xfs_acl); + + return (xfs_attr_get(XFS_I(inode), name, NULL, &len, + ATTR_ROOT|ATTR_KERNOVAL) == 0); +} + +int +posix_acl_access_exists(struct inode *inode) +{ + return xfs_acl_exists(inode, SGI_ACL_FILE); +} + +int +posix_acl_default_exists(struct inode *inode) +{ + if (!S_ISDIR(inode->i_mode)) + return 0; + return xfs_acl_exists(inode, SGI_ACL_DEFAULT); +} + +/* + * No need for i_mutex because the inode is not yet exposed to the VFS. + */ +int +xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) +{ + umode_t mode = inode->i_mode; + int error = 0, inherit = 0; + + if (S_ISDIR(inode->i_mode)) { + error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); + if (error) + goto out; + } + + error = posix_acl_create(&acl, GFP_KERNEL, &mode); + if (error < 0) + return error; + + /* + * If posix_acl_create returns a positive value we need to + * inherit a permission that can't be represented using the Unix + * mode bits and we actually need to set an ACL. + */ + if (error > 0) + inherit = 1; + + error = xfs_set_mode(inode, mode); + if (error) + goto out; + + if (inherit) + error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); + +out: + posix_acl_release(acl); + return error; +} + +int +xfs_acl_chmod(struct inode *inode) +{ + struct posix_acl *acl; + int error; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl) || !acl) + return PTR_ERR(acl); + + error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (error) + return error; + + error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); + posix_acl_release(acl); + return error; +} + +static int +xfs_xattr_acl_get(struct dentry *dentry, const char *name, + void *value, size_t size, int type) +{ + struct posix_acl *acl; + int error; + + acl = xfs_get_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + + error = posix_acl_to_xattr(acl, value, size); + posix_acl_release(acl); + + return error; +} + +static int +xfs_xattr_acl_set(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int type) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl = NULL; + int error = 0; + + if (flags & XATTR_CREATE) + return -EINVAL; + if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) + return value ? -EACCES : 0; + if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER)) + return -EPERM; + + if (!value) + goto set_acl; + + acl = posix_acl_from_xattr(value, size); + if (!acl) { + /* + * acl_set_file(3) may request that we set default ACLs with + * zero length -- defend (gracefully) against that here. + */ + goto out; + } + if (IS_ERR(acl)) { + error = PTR_ERR(acl); + goto out; + } + + error = posix_acl_valid(acl); + if (error) + goto out_release; + + error = -EINVAL; + if (acl->a_count > XFS_ACL_MAX_ENTRIES) + goto out_release; + + if (type == ACL_TYPE_ACCESS) { + umode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + + if (error <= 0) { + posix_acl_release(acl); + acl = NULL; + + if (error < 0) + return error; + } + + error = xfs_set_mode(inode, mode); + if (error) + goto out_release; + } + + set_acl: + error = xfs_set_acl(inode, type, acl); + out_release: + posix_acl_release(acl); + out: + return error; +} + +const struct xattr_handler xfs_xattr_acl_access_handler = { + .prefix = POSIX_ACL_XATTR_ACCESS, + .flags = ACL_TYPE_ACCESS, + .get = xfs_xattr_acl_get, + .set = xfs_xattr_acl_set, +}; + +const struct xattr_handler xfs_xattr_acl_default_handler = { + .prefix = POSIX_ACL_XATTR_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .get = xfs_xattr_acl_get, + .set = xfs_xattr_acl_set, +}; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c new file mode 100644 index 000000000000..63e971e2b837 --- /dev/null +++ b/fs/xfs/xfs_aops.c @@ -0,0 +1,1499 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_trans.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_rw.h" +#include "xfs_iomap.h" +#include "xfs_vnodeops.h" +#include "xfs_trace.h" +#include "xfs_bmap.h" +#include +#include +#include +#include + + +/* + * Prime number of hash buckets since address is used as the key. + */ +#define NVSYNC 37 +#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) +static wait_queue_head_t xfs_ioend_wq[NVSYNC]; + +void __init +xfs_ioend_init(void) +{ + int i; + + for (i = 0; i < NVSYNC; i++) + init_waitqueue_head(&xfs_ioend_wq[i]); +} + +void +xfs_ioend_wait( + xfs_inode_t *ip) +{ + wait_queue_head_t *wq = to_ioend_wq(ip); + + wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); +} + +STATIC void +xfs_ioend_wake( + xfs_inode_t *ip) +{ + if (atomic_dec_and_test(&ip->i_iocount)) + wake_up(to_ioend_wq(ip)); +} + +void +xfs_count_page_state( + struct page *page, + int *delalloc, + int *unwritten) +{ + struct buffer_head *bh, *head; + + *delalloc = *unwritten = 0; + + bh = head = page_buffers(page); + do { + if (buffer_unwritten(bh)) + (*unwritten) = 1; + else if (buffer_delay(bh)) + (*delalloc) = 1; + } while ((bh = bh->b_this_page) != head); +} + +STATIC struct block_device * +xfs_find_bdev_for_inode( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + if (XFS_IS_REALTIME_INODE(ip)) + return mp->m_rtdev_targp->bt_bdev; + else + return mp->m_ddev_targp->bt_bdev; +} + +/* + * We're now finished for good with this ioend structure. + * Update the page state via the associated buffer_heads, + * release holds on the inode and bio, and finally free + * up memory. Do not use the ioend after this. + */ +STATIC void +xfs_destroy_ioend( + xfs_ioend_t *ioend) +{ + struct buffer_head *bh, *next; + struct xfs_inode *ip = XFS_I(ioend->io_inode); + + for (bh = ioend->io_buffer_head; bh; bh = next) { + next = bh->b_private; + bh->b_end_io(bh, !ioend->io_error); + } + + /* + * Volume managers supporting multiple paths can send back ENODEV + * when the final path disappears. In this case continuing to fill + * the page cache with dirty data which cannot be written out is + * evil, so prevent that. + */ + if (unlikely(ioend->io_error == -ENODEV)) { + xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, + __FILE__, __LINE__); + } + + xfs_ioend_wake(ip); + mempool_free(ioend, xfs_ioend_pool); +} + +/* + * If the end of the current ioend is beyond the current EOF, + * return the new EOF value, otherwise zero. + */ +STATIC xfs_fsize_t +xfs_ioend_new_eof( + xfs_ioend_t *ioend) +{ + xfs_inode_t *ip = XFS_I(ioend->io_inode); + xfs_fsize_t isize; + xfs_fsize_t bsize; + + bsize = ioend->io_offset + ioend->io_size; + isize = MAX(ip->i_size, ip->i_new_size); + isize = MIN(isize, bsize); + return isize > ip->i_d.di_size ? isize : 0; +} + +/* + * Update on-disk file size now that data has been written to disk. The + * current in-memory file size is i_size. If a write is beyond eof i_new_size + * will be the intended file size until i_size is updated. If this write does + * not extend all the way to the valid file size then restrict this update to + * the end of the write. + * + * This function does not block as blocking on the inode lock in IO completion + * can lead to IO completion order dependency deadlocks.. If it can't get the + * inode ilock it will return EAGAIN. Callers must handle this. + */ +STATIC int +xfs_setfilesize( + xfs_ioend_t *ioend) +{ + xfs_inode_t *ip = XFS_I(ioend->io_inode); + xfs_fsize_t isize; + + if (unlikely(ioend->io_error)) + return 0; + + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return EAGAIN; + + isize = xfs_ioend_new_eof(ioend); + if (isize) { + trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); + ip->i_d.di_size = isize; + xfs_mark_inode_dirty(ip); + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return 0; +} + +/* + * Schedule IO completion handling on the final put of an ioend. + */ +STATIC void +xfs_finish_ioend( + struct xfs_ioend *ioend) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) { + if (ioend->io_type == IO_UNWRITTEN) + queue_work(xfsconvertd_workqueue, &ioend->io_work); + else + queue_work(xfsdatad_workqueue, &ioend->io_work); + } +} + +/* + * IO write completion. + */ +STATIC void +xfs_end_io( + struct work_struct *work) +{ + xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); + struct xfs_inode *ip = XFS_I(ioend->io_inode); + int error = 0; + + /* + * For unwritten extents we need to issue transactions to convert a + * range to normal written extens after the data I/O has finished. + */ + if (ioend->io_type == IO_UNWRITTEN && + likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) { + + error = xfs_iomap_write_unwritten(ip, ioend->io_offset, + ioend->io_size); + if (error) + ioend->io_error = error; + } + + /* + * We might have to update the on-disk file size after extending + * writes. + */ + error = xfs_setfilesize(ioend); + ASSERT(!error || error == EAGAIN); + + /* + * If we didn't complete processing of the ioend, requeue it to the + * tail of the workqueue for another attempt later. Otherwise destroy + * it. + */ + if (error == EAGAIN) { + atomic_inc(&ioend->io_remaining); + xfs_finish_ioend(ioend); + /* ensure we don't spin on blocked ioends */ + delay(1); + } else { + if (ioend->io_iocb) + aio_complete(ioend->io_iocb, ioend->io_result, 0); + xfs_destroy_ioend(ioend); + } +} + +/* + * Call IO completion handling in caller context on the final put of an ioend. + */ +STATIC void +xfs_finish_ioend_sync( + struct xfs_ioend *ioend) +{ + if (atomic_dec_and_test(&ioend->io_remaining)) + xfs_end_io(&ioend->io_work); +} + +/* + * Allocate and initialise an IO completion structure. + * We need to track unwritten extent write completion here initially. + * We'll need to extend this for updating the ondisk inode size later + * (vs. incore size). + */ +STATIC xfs_ioend_t * +xfs_alloc_ioend( + struct inode *inode, + unsigned int type) +{ + xfs_ioend_t *ioend; + + ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); + + /* + * Set the count to 1 initially, which will prevent an I/O + * completion callback from happening before we have started + * all the I/O from calling the completion routine too early. + */ + atomic_set(&ioend->io_remaining, 1); + ioend->io_error = 0; + ioend->io_list = NULL; + ioend->io_type = type; + ioend->io_inode = inode; + ioend->io_buffer_head = NULL; + ioend->io_buffer_tail = NULL; + atomic_inc(&XFS_I(ioend->io_inode)->i_iocount); + ioend->io_offset = 0; + ioend->io_size = 0; + ioend->io_iocb = NULL; + ioend->io_result = 0; + + INIT_WORK(&ioend->io_work, xfs_end_io); + return ioend; +} + +STATIC int +xfs_map_blocks( + struct inode *inode, + loff_t offset, + struct xfs_bmbt_irec *imap, + int type, + int nonblocking) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + ssize_t count = 1 << inode->i_blkbits; + xfs_fileoff_t offset_fsb, end_fsb; + int error = 0; + int bmapi_flags = XFS_BMAPI_ENTIRE; + int nimaps = 1; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + if (type == IO_UNWRITTEN) + bmapi_flags |= XFS_BMAPI_IGSTATE; + + if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { + if (nonblocking) + return -XFS_ERROR(EAGAIN); + xfs_ilock(ip, XFS_ILOCK_SHARED); + } + + ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || + (ip->i_df.if_flags & XFS_IFEXTENTS)); + ASSERT(offset <= mp->m_maxioffset); + + if (offset + count > mp->m_maxioffset) + count = mp->m_maxioffset - offset; + end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); + offset_fsb = XFS_B_TO_FSBT(mp, offset); + error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, + bmapi_flags, NULL, 0, imap, &nimaps, NULL); + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (error) + return -XFS_ERROR(error); + + if (type == IO_DELALLOC && + (!nimaps || isnullstartblock(imap->br_startblock))) { + error = xfs_iomap_write_allocate(ip, offset, count, imap); + if (!error) + trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); + return -XFS_ERROR(error); + } + +#ifdef DEBUG + if (type == IO_UNWRITTEN) { + ASSERT(nimaps); + ASSERT(imap->br_startblock != HOLESTARTBLOCK); + ASSERT(imap->br_startblock != DELAYSTARTBLOCK); + } +#endif + if (nimaps) + trace_xfs_map_blocks_found(ip, offset, count, type, imap); + return 0; +} + +STATIC int +xfs_imap_valid( + struct inode *inode, + struct xfs_bmbt_irec *imap, + xfs_off_t offset) +{ + offset >>= inode->i_blkbits; + + return offset >= imap->br_startoff && + offset < imap->br_startoff + imap->br_blockcount; +} + +/* + * BIO completion handler for buffered IO. + */ +STATIC void +xfs_end_bio( + struct bio *bio, + int error) +{ + xfs_ioend_t *ioend = bio->bi_private; + + ASSERT(atomic_read(&bio->bi_cnt) >= 1); + ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; + + /* Toss bio and pass work off to an xfsdatad thread */ + bio->bi_private = NULL; + bio->bi_end_io = NULL; + bio_put(bio); + + xfs_finish_ioend(ioend); +} + +STATIC void +xfs_submit_ioend_bio( + struct writeback_control *wbc, + xfs_ioend_t *ioend, + struct bio *bio) +{ + atomic_inc(&ioend->io_remaining); + bio->bi_private = ioend; + bio->bi_end_io = xfs_end_bio; + + /* + * If the I/O is beyond EOF we mark the inode dirty immediately + * but don't update the inode size until I/O completion. + */ + if (xfs_ioend_new_eof(ioend)) + xfs_mark_inode_dirty(XFS_I(ioend->io_inode)); + + submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); +} + +STATIC struct bio * +xfs_alloc_ioend_bio( + struct buffer_head *bh) +{ + int nvecs = bio_get_nr_vecs(bh->b_bdev); + struct bio *bio = bio_alloc(GFP_NOIO, nvecs); + + ASSERT(bio->bi_private == NULL); + bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); + bio->bi_bdev = bh->b_bdev; + return bio; +} + +STATIC void +xfs_start_buffer_writeback( + struct buffer_head *bh) +{ + ASSERT(buffer_mapped(bh)); + ASSERT(buffer_locked(bh)); + ASSERT(!buffer_delay(bh)); + ASSERT(!buffer_unwritten(bh)); + + mark_buffer_async_write(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); +} + +STATIC void +xfs_start_page_writeback( + struct page *page, + int clear_dirty, + int buffers) +{ + ASSERT(PageLocked(page)); + ASSERT(!PageWriteback(page)); + if (clear_dirty) + clear_page_dirty_for_io(page); + set_page_writeback(page); + unlock_page(page); + /* If no buffers on the page are to be written, finish it here */ + if (!buffers) + end_page_writeback(page); +} + +static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh) +{ + return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); +} + +/* + * Submit all of the bios for all of the ioends we have saved up, covering the + * initial writepage page and also any probed pages. + * + * Because we may have multiple ioends spanning a page, we need to start + * writeback on all the buffers before we submit them for I/O. If we mark the + * buffers as we got, then we can end up with a page that only has buffers + * marked async write and I/O complete on can occur before we mark the other + * buffers async write. + * + * The end result of this is that we trip a bug in end_page_writeback() because + * we call it twice for the one page as the code in end_buffer_async_write() + * assumes that all buffers on the page are started at the same time. + * + * The fix is two passes across the ioend list - one to start writeback on the + * buffer_heads, and then submit them for I/O on the second pass. + */ +STATIC void +xfs_submit_ioend( + struct writeback_control *wbc, + xfs_ioend_t *ioend) +{ + xfs_ioend_t *head = ioend; + xfs_ioend_t *next; + struct buffer_head *bh; + struct bio *bio; + sector_t lastblock = 0; + + /* Pass 1 - start writeback */ + do { + next = ioend->io_list; + for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) + xfs_start_buffer_writeback(bh); + } while ((ioend = next) != NULL); + + /* Pass 2 - submit I/O */ + ioend = head; + do { + next = ioend->io_list; + bio = NULL; + + for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { + + if (!bio) { + retry: + bio = xfs_alloc_ioend_bio(bh); + } else if (bh->b_blocknr != lastblock + 1) { + xfs_submit_ioend_bio(wbc, ioend, bio); + goto retry; + } + + if (bio_add_buffer(bio, bh) != bh->b_size) { + xfs_submit_ioend_bio(wbc, ioend, bio); + goto retry; + } + + lastblock = bh->b_blocknr; + } + if (bio) + xfs_submit_ioend_bio(wbc, ioend, bio); + xfs_finish_ioend(ioend); + } while ((ioend = next) != NULL); +} + +/* + * Cancel submission of all buffer_heads so far in this endio. + * Toss the endio too. Only ever called for the initial page + * in a writepage request, so only ever one page. + */ +STATIC void +xfs_cancel_ioend( + xfs_ioend_t *ioend) +{ + xfs_ioend_t *next; + struct buffer_head *bh, *next_bh; + + do { + next = ioend->io_list; + bh = ioend->io_buffer_head; + do { + next_bh = bh->b_private; + clear_buffer_async_write(bh); + unlock_buffer(bh); + } while ((bh = next_bh) != NULL); + + xfs_ioend_wake(XFS_I(ioend->io_inode)); + mempool_free(ioend, xfs_ioend_pool); + } while ((ioend = next) != NULL); +} + +/* + * Test to see if we've been building up a completion structure for + * earlier buffers -- if so, we try to append to this ioend if we + * can, otherwise we finish off any current ioend and start another. + * Return true if we've finished the given ioend. + */ +STATIC void +xfs_add_to_ioend( + struct inode *inode, + struct buffer_head *bh, + xfs_off_t offset, + unsigned int type, + xfs_ioend_t **result, + int need_ioend) +{ + xfs_ioend_t *ioend = *result; + + if (!ioend || need_ioend || type != ioend->io_type) { + xfs_ioend_t *previous = *result; + + ioend = xfs_alloc_ioend(inode, type); + ioend->io_offset = offset; + ioend->io_buffer_head = bh; + ioend->io_buffer_tail = bh; + if (previous) + previous->io_list = ioend; + *result = ioend; + } else { + ioend->io_buffer_tail->b_private = bh; + ioend->io_buffer_tail = bh; + } + + bh->b_private = NULL; + ioend->io_size += bh->b_size; +} + +STATIC void +xfs_map_buffer( + struct inode *inode, + struct buffer_head *bh, + struct xfs_bmbt_irec *imap, + xfs_off_t offset) +{ + sector_t bn; + struct xfs_mount *m = XFS_I(inode)->i_mount; + xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); + xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); + + ASSERT(imap->br_startblock != HOLESTARTBLOCK); + ASSERT(imap->br_startblock != DELAYSTARTBLOCK); + + bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + + ((offset - iomap_offset) >> inode->i_blkbits); + + ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); + + bh->b_blocknr = bn; + set_buffer_mapped(bh); +} + +STATIC void +xfs_map_at_offset( + struct inode *inode, + struct buffer_head *bh, + struct xfs_bmbt_irec *imap, + xfs_off_t offset) +{ + ASSERT(imap->br_startblock != HOLESTARTBLOCK); + ASSERT(imap->br_startblock != DELAYSTARTBLOCK); + + xfs_map_buffer(inode, bh, imap, offset); + set_buffer_mapped(bh); + clear_buffer_delay(bh); + clear_buffer_unwritten(bh); +} + +/* + * Test if a given page is suitable for writing as part of an unwritten + * or delayed allocate extent. + */ +STATIC int +xfs_is_delayed_page( + struct page *page, + unsigned int type) +{ + if (PageWriteback(page)) + return 0; + + if (page->mapping && page_has_buffers(page)) { + struct buffer_head *bh, *head; + int acceptable = 0; + + bh = head = page_buffers(page); + do { + if (buffer_unwritten(bh)) + acceptable = (type == IO_UNWRITTEN); + else if (buffer_delay(bh)) + acceptable = (type == IO_DELALLOC); + else if (buffer_dirty(bh) && buffer_mapped(bh)) + acceptable = (type == IO_OVERWRITE); + else + break; + } while ((bh = bh->b_this_page) != head); + + if (acceptable) + return 1; + } + + return 0; +} + +/* + * Allocate & map buffers for page given the extent map. Write it out. + * except for the original page of a writepage, this is called on + * delalloc/unwritten pages only, for the original page it is possible + * that the page has no mapping at all. + */ +STATIC int +xfs_convert_page( + struct inode *inode, + struct page *page, + loff_t tindex, + struct xfs_bmbt_irec *imap, + xfs_ioend_t **ioendp, + struct writeback_control *wbc) +{ + struct buffer_head *bh, *head; + xfs_off_t end_offset; + unsigned long p_offset; + unsigned int type; + int len, page_dirty; + int count = 0, done = 0, uptodate = 1; + xfs_off_t offset = page_offset(page); + + if (page->index != tindex) + goto fail; + if (!trylock_page(page)) + goto fail; + if (PageWriteback(page)) + goto fail_unlock_page; + if (page->mapping != inode->i_mapping) + goto fail_unlock_page; + if (!xfs_is_delayed_page(page, (*ioendp)->io_type)) + goto fail_unlock_page; + + /* + * page_dirty is initially a count of buffers on the page before + * EOF and is decremented as we move each into a cleanable state. + * + * Derivation: + * + * End offset is the highest offset that this page should represent. + * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) + * will evaluate non-zero and be less than PAGE_CACHE_SIZE and + * hence give us the correct page_dirty count. On any other page, + * it will be zero and in that case we need page_dirty to be the + * count of buffers on the page. + */ + end_offset = min_t(unsigned long long, + (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, + i_size_read(inode)); + + len = 1 << inode->i_blkbits; + p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), + PAGE_CACHE_SIZE); + p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; + page_dirty = p_offset / len; + + bh = head = page_buffers(page); + do { + if (offset >= end_offset) + break; + if (!buffer_uptodate(bh)) + uptodate = 0; + if (!(PageUptodate(page) || buffer_uptodate(bh))) { + done = 1; + continue; + } + + if (buffer_unwritten(bh) || buffer_delay(bh) || + buffer_mapped(bh)) { + if (buffer_unwritten(bh)) + type = IO_UNWRITTEN; + else if (buffer_delay(bh)) + type = IO_DELALLOC; + else + type = IO_OVERWRITE; + + if (!xfs_imap_valid(inode, imap, offset)) { + done = 1; + continue; + } + + lock_buffer(bh); + if (type != IO_OVERWRITE) + xfs_map_at_offset(inode, bh, imap, offset); + xfs_add_to_ioend(inode, bh, offset, type, + ioendp, done); + + page_dirty--; + count++; + } else { + done = 1; + } + } while (offset += len, (bh = bh->b_this_page) != head); + + if (uptodate && bh == head) + SetPageUptodate(page); + + if (count) { + if (--wbc->nr_to_write <= 0 && + wbc->sync_mode == WB_SYNC_NONE) + done = 1; + } + xfs_start_page_writeback(page, !page_dirty, count); + + return done; + fail_unlock_page: + unlock_page(page); + fail: + return 1; +} + +/* + * Convert & write out a cluster of pages in the same extent as defined + * by mp and following the start page. + */ +STATIC void +xfs_cluster_write( + struct inode *inode, + pgoff_t tindex, + struct xfs_bmbt_irec *imap, + xfs_ioend_t **ioendp, + struct writeback_control *wbc, + pgoff_t tlast) +{ + struct pagevec pvec; + int done = 0, i; + + pagevec_init(&pvec, 0); + while (!done && tindex <= tlast) { + unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); + + if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) + break; + + for (i = 0; i < pagevec_count(&pvec); i++) { + done = xfs_convert_page(inode, pvec.pages[i], tindex++, + imap, ioendp, wbc); + if (done) + break; + } + + pagevec_release(&pvec); + cond_resched(); + } +} + +STATIC void +xfs_vm_invalidatepage( + struct page *page, + unsigned long offset) +{ + trace_xfs_invalidatepage(page->mapping->host, page, offset); + block_invalidatepage(page, offset); +} + +/* + * If the page has delalloc buffers on it, we need to punch them out before we + * invalidate the page. If we don't, we leave a stale delalloc mapping on the + * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read + * is done on that same region - the delalloc extent is returned when none is + * supposed to be there. + * + * We prevent this by truncating away the delalloc regions on the page before + * invalidating it. Because they are delalloc, we can do this without needing a + * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this + * truncation without a transaction as there is no space left for block + * reservation (typically why we see a ENOSPC in writeback). + * + * This is not a performance critical path, so for now just do the punching a + * buffer head at a time. + */ +STATIC void +xfs_aops_discard_page( + struct page *page) +{ + struct inode *inode = page->mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct buffer_head *bh, *head; + loff_t offset = page_offset(page); + + if (!xfs_is_delayed_page(page, IO_DELALLOC)) + goto out_invalidate; + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + goto out_invalidate; + + xfs_alert(ip->i_mount, + "page discard on page %p, inode 0x%llx, offset %llu.", + page, ip->i_ino, offset); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + bh = head = page_buffers(page); + do { + int error; + xfs_fileoff_t start_fsb; + + if (!buffer_delay(bh)) + goto next_buffer; + + start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); + error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); + if (error) { + /* something screwed, just bail */ + if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_alert(ip->i_mount, + "page discard unable to remove delalloc mapping."); + } + break; + } +next_buffer: + offset += 1 << inode->i_blkbits; + + } while ((bh = bh->b_this_page) != head); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out_invalidate: + xfs_vm_invalidatepage(page, 0); + return; +} + +/* + * Write out a dirty page. + * + * For delalloc space on the page we need to allocate space and flush it. + * For unwritten space on the page we need to start the conversion to + * regular allocated space. + * For any other dirty buffer heads on the page we should flush them. + */ +STATIC int +xfs_vm_writepage( + struct page *page, + struct writeback_control *wbc) +{ + struct inode *inode = page->mapping->host; + struct buffer_head *bh, *head; + struct xfs_bmbt_irec imap; + xfs_ioend_t *ioend = NULL, *iohead = NULL; + loff_t offset; + unsigned int type; + __uint64_t end_offset; + pgoff_t end_index, last_index; + ssize_t len; + int err, imap_valid = 0, uptodate = 1; + int count = 0; + int nonblocking = 0; + + trace_xfs_writepage(inode, page, 0); + + ASSERT(page_has_buffers(page)); + + /* + * Refuse to write the page out if we are called from reclaim context. + * + * This avoids stack overflows when called from deeply used stacks in + * random callers for direct reclaim or memcg reclaim. We explicitly + * allow reclaim from kswapd as the stack usage there is relatively low. + * + * This should really be done by the core VM, but until that happens + * filesystems like XFS, btrfs and ext4 have to take care of this + * by themselves. + */ + if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC) + goto redirty; + + /* + * Given that we do not allow direct reclaim to call us, we should + * never be called while in a filesystem transaction. + */ + if (WARN_ON(current->flags & PF_FSTRANS)) + goto redirty; + + /* Is this page beyond the end of the file? */ + offset = i_size_read(inode); + end_index = offset >> PAGE_CACHE_SHIFT; + last_index = (offset - 1) >> PAGE_CACHE_SHIFT; + if (page->index >= end_index) { + if ((page->index >= end_index + 1) || + !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) { + unlock_page(page); + return 0; + } + } + + end_offset = min_t(unsigned long long, + (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, + offset); + len = 1 << inode->i_blkbits; + + bh = head = page_buffers(page); + offset = page_offset(page); + type = IO_OVERWRITE; + + if (wbc->sync_mode == WB_SYNC_NONE) + nonblocking = 1; + + do { + int new_ioend = 0; + + if (offset >= end_offset) + break; + if (!buffer_uptodate(bh)) + uptodate = 0; + + /* + * set_page_dirty dirties all buffers in a page, independent + * of their state. The dirty state however is entirely + * meaningless for holes (!mapped && uptodate), so skip + * buffers covering holes here. + */ + if (!buffer_mapped(bh) && buffer_uptodate(bh)) { + imap_valid = 0; + continue; + } + + if (buffer_unwritten(bh)) { + if (type != IO_UNWRITTEN) { + type = IO_UNWRITTEN; + imap_valid = 0; + } + } else if (buffer_delay(bh)) { + if (type != IO_DELALLOC) { + type = IO_DELALLOC; + imap_valid = 0; + } + } else if (buffer_uptodate(bh)) { + if (type != IO_OVERWRITE) { + type = IO_OVERWRITE; + imap_valid = 0; + } + } else { + if (PageUptodate(page)) { + ASSERT(buffer_mapped(bh)); + imap_valid = 0; + } + continue; + } + + if (imap_valid) + imap_valid = xfs_imap_valid(inode, &imap, offset); + if (!imap_valid) { + /* + * If we didn't have a valid mapping then we need to + * put the new mapping into a separate ioend structure. + * This ensures non-contiguous extents always have + * separate ioends, which is particularly important + * for unwritten extent conversion at I/O completion + * time. + */ + new_ioend = 1; + err = xfs_map_blocks(inode, offset, &imap, type, + nonblocking); + if (err) + goto error; + imap_valid = xfs_imap_valid(inode, &imap, offset); + } + if (imap_valid) { + lock_buffer(bh); + if (type != IO_OVERWRITE) + xfs_map_at_offset(inode, bh, &imap, offset); + xfs_add_to_ioend(inode, bh, offset, type, &ioend, + new_ioend); + count++; + } + + if (!iohead) + iohead = ioend; + + } while (offset += len, ((bh = bh->b_this_page) != head)); + + if (uptodate && bh == head) + SetPageUptodate(page); + + xfs_start_page_writeback(page, 1, count); + + if (ioend && imap_valid) { + xfs_off_t end_index; + + end_index = imap.br_startoff + imap.br_blockcount; + + /* to bytes */ + end_index <<= inode->i_blkbits; + + /* to pages */ + end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; + + /* check against file size */ + if (end_index > last_index) + end_index = last_index; + + xfs_cluster_write(inode, page->index + 1, &imap, &ioend, + wbc, end_index); + } + + if (iohead) + xfs_submit_ioend(wbc, iohead); + + return 0; + +error: + if (iohead) + xfs_cancel_ioend(iohead); + + if (err == -EAGAIN) + goto redirty; + + xfs_aops_discard_page(page); + ClearPageUptodate(page); + unlock_page(page); + return err; + +redirty: + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; +} + +STATIC int +xfs_vm_writepages( + struct address_space *mapping, + struct writeback_control *wbc) +{ + xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); + return generic_writepages(mapping, wbc); +} + +/* + * Called to move a page into cleanable state - and from there + * to be released. The page should already be clean. We always + * have buffer heads in this call. + * + * Returns 1 if the page is ok to release, 0 otherwise. + */ +STATIC int +xfs_vm_releasepage( + struct page *page, + gfp_t gfp_mask) +{ + int delalloc, unwritten; + + trace_xfs_releasepage(page->mapping->host, page, 0); + + xfs_count_page_state(page, &delalloc, &unwritten); + + if (WARN_ON(delalloc)) + return 0; + if (WARN_ON(unwritten)) + return 0; + + return try_to_free_buffers(page); +} + +STATIC int +__xfs_get_blocks( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create, + int direct) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb, end_fsb; + int error = 0; + int lockmode = 0; + struct xfs_bmbt_irec imap; + int nimaps = 1; + xfs_off_t offset; + ssize_t size; + int new = 0; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + offset = (xfs_off_t)iblock << inode->i_blkbits; + ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); + size = bh_result->b_size; + + if (!create && direct && offset >= i_size_read(inode)) + return 0; + + if (create) { + lockmode = XFS_ILOCK_EXCL; + xfs_ilock(ip, lockmode); + } else { + lockmode = xfs_ilock_map_shared(ip); + } + + ASSERT(offset <= mp->m_maxioffset); + if (offset + size > mp->m_maxioffset) + size = mp->m_maxioffset - offset; + end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); + offset_fsb = XFS_B_TO_FSBT(mp, offset); + + error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb, + XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL); + if (error) + goto out_unlock; + + if (create && + (!nimaps || + (imap.br_startblock == HOLESTARTBLOCK || + imap.br_startblock == DELAYSTARTBLOCK))) { + if (direct) { + error = xfs_iomap_write_direct(ip, offset, size, + &imap, nimaps); + } else { + error = xfs_iomap_write_delay(ip, offset, size, &imap); + } + if (error) + goto out_unlock; + + trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); + } else if (nimaps) { + trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); + } else { + trace_xfs_get_blocks_notfound(ip, offset, size); + goto out_unlock; + } + xfs_iunlock(ip, lockmode); + + if (imap.br_startblock != HOLESTARTBLOCK && + imap.br_startblock != DELAYSTARTBLOCK) { + /* + * For unwritten extents do not report a disk address on + * the read case (treat as if we're reading into a hole). + */ + if (create || !ISUNWRITTEN(&imap)) + xfs_map_buffer(inode, bh_result, &imap, offset); + if (create && ISUNWRITTEN(&imap)) { + if (direct) + bh_result->b_private = inode; + set_buffer_unwritten(bh_result); + } + } + + /* + * If this is a realtime file, data may be on a different device. + * to that pointed to from the buffer_head b_bdev currently. + */ + bh_result->b_bdev = xfs_find_bdev_for_inode(inode); + + /* + * If we previously allocated a block out beyond eof and we are now + * coming back to use it then we will need to flag it as new even if it + * has a disk address. + * + * With sub-block writes into unwritten extents we also need to mark + * the buffer as new so that the unwritten parts of the buffer gets + * correctly zeroed. + */ + if (create && + ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || + (offset >= i_size_read(inode)) || + (new || ISUNWRITTEN(&imap)))) + set_buffer_new(bh_result); + + if (imap.br_startblock == DELAYSTARTBLOCK) { + BUG_ON(direct); + if (create) { + set_buffer_uptodate(bh_result); + set_buffer_mapped(bh_result); + set_buffer_delay(bh_result); + } + } + + /* + * If this is O_DIRECT or the mpage code calling tell them how large + * the mapping is, so that we can avoid repeated get_blocks calls. + */ + if (direct || size > (1 << inode->i_blkbits)) { + xfs_off_t mapping_size; + + mapping_size = imap.br_startoff + imap.br_blockcount - iblock; + mapping_size <<= inode->i_blkbits; + + ASSERT(mapping_size > 0); + if (mapping_size > size) + mapping_size = size; + if (mapping_size > LONG_MAX) + mapping_size = LONG_MAX; + + bh_result->b_size = mapping_size; + } + + return 0; + +out_unlock: + xfs_iunlock(ip, lockmode); + return -error; +} + +int +xfs_get_blocks( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create) +{ + return __xfs_get_blocks(inode, iblock, bh_result, create, 0); +} + +STATIC int +xfs_get_blocks_direct( + struct inode *inode, + sector_t iblock, + struct buffer_head *bh_result, + int create) +{ + return __xfs_get_blocks(inode, iblock, bh_result, create, 1); +} + +/* + * Complete a direct I/O write request. + * + * If the private argument is non-NULL __xfs_get_blocks signals us that we + * need to issue a transaction to convert the range from unwritten to written + * extents. In case this is regular synchronous I/O we just call xfs_end_io + * to do this and we are done. But in case this was a successful AIO + * request this handler is called from interrupt context, from which we + * can't start transactions. In that case offload the I/O completion to + * the workqueues we also use for buffered I/O completion. + */ +STATIC void +xfs_end_io_direct_write( + struct kiocb *iocb, + loff_t offset, + ssize_t size, + void *private, + int ret, + bool is_async) +{ + struct xfs_ioend *ioend = iocb->private; + + /* + * blockdev_direct_IO can return an error even after the I/O + * completion handler was called. Thus we need to protect + * against double-freeing. + */ + iocb->private = NULL; + + ioend->io_offset = offset; + ioend->io_size = size; + if (private && size > 0) + ioend->io_type = IO_UNWRITTEN; + + if (is_async) { + /* + * If we are converting an unwritten extent we need to delay + * the AIO completion until after the unwrittent extent + * conversion has completed, otherwise do it ASAP. + */ + if (ioend->io_type == IO_UNWRITTEN) { + ioend->io_iocb = iocb; + ioend->io_result = ret; + } else { + aio_complete(iocb, ret, 0); + } + xfs_finish_ioend(ioend); + } else { + xfs_finish_ioend_sync(ioend); + } + + /* XXX: probably should move into the real I/O completion handler */ + inode_dio_done(ioend->io_inode); +} + +STATIC ssize_t +xfs_vm_direct_IO( + int rw, + struct kiocb *iocb, + const struct iovec *iov, + loff_t offset, + unsigned long nr_segs) +{ + struct inode *inode = iocb->ki_filp->f_mapping->host; + struct block_device *bdev = xfs_find_bdev_for_inode(inode); + ssize_t ret; + + if (rw & WRITE) { + iocb->private = xfs_alloc_ioend(inode, IO_DIRECT); + + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, + offset, nr_segs, + xfs_get_blocks_direct, + xfs_end_io_direct_write, NULL, 0); + if (ret != -EIOCBQUEUED && iocb->private) + xfs_destroy_ioend(iocb->private); + } else { + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, + offset, nr_segs, + xfs_get_blocks_direct, + NULL, NULL, 0); + } + + return ret; +} + +STATIC void +xfs_vm_write_failed( + struct address_space *mapping, + loff_t to) +{ + struct inode *inode = mapping->host; + + if (to > inode->i_size) { + /* + * punch out the delalloc blocks we have already allocated. We + * don't call xfs_setattr() to do this as we may be in the + * middle of a multi-iovec write and so the vfs inode->i_size + * will not match the xfs ip->i_size and so it will zero too + * much. Hence we jus truncate the page cache to zero what is + * necessary and punch the delalloc blocks directly. + */ + struct xfs_inode *ip = XFS_I(inode); + xfs_fileoff_t start_fsb; + xfs_fileoff_t end_fsb; + int error; + + truncate_pagecache(inode, to, inode->i_size); + + /* + * Check if there are any blocks that are outside of i_size + * that need to be trimmed back. + */ + start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; + end_fsb = XFS_B_TO_FSB(ip->i_mount, to); + if (end_fsb <= start_fsb) + return; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_bmap_punch_delalloc_range(ip, start_fsb, + end_fsb - start_fsb); + if (error) { + /* something screwed, just bail */ + if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_alert(ip->i_mount, + "xfs_vm_write_failed: unable to clean up ino %lld", + ip->i_ino); + } + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } +} + +STATIC int +xfs_vm_write_begin( + struct file *file, + struct address_space *mapping, + loff_t pos, + unsigned len, + unsigned flags, + struct page **pagep, + void **fsdata) +{ + int ret; + + ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, + pagep, xfs_get_blocks); + if (unlikely(ret)) + xfs_vm_write_failed(mapping, pos + len); + return ret; +} + +STATIC int +xfs_vm_write_end( + struct file *file, + struct address_space *mapping, + loff_t pos, + unsigned len, + unsigned copied, + struct page *page, + void *fsdata) +{ + int ret; + + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); + if (unlikely(ret < len)) + xfs_vm_write_failed(mapping, pos + len); + return ret; +} + +STATIC sector_t +xfs_vm_bmap( + struct address_space *mapping, + sector_t block) +{ + struct inode *inode = (struct inode *)mapping->host; + struct xfs_inode *ip = XFS_I(inode); + + trace_xfs_vm_bmap(XFS_I(inode)); + xfs_ilock(ip, XFS_IOLOCK_SHARED); + xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return generic_block_bmap(mapping, block, xfs_get_blocks); +} + +STATIC int +xfs_vm_readpage( + struct file *unused, + struct page *page) +{ + return mpage_readpage(page, xfs_get_blocks); +} + +STATIC int +xfs_vm_readpages( + struct file *unused, + struct address_space *mapping, + struct list_head *pages, + unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); +} + +const struct address_space_operations xfs_address_space_operations = { + .readpage = xfs_vm_readpage, + .readpages = xfs_vm_readpages, + .writepage = xfs_vm_writepage, + .writepages = xfs_vm_writepages, + .releasepage = xfs_vm_releasepage, + .invalidatepage = xfs_vm_invalidatepage, + .write_begin = xfs_vm_write_begin, + .write_end = xfs_vm_write_end, + .bmap = xfs_vm_bmap, + .direct_IO = xfs_vm_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, + .error_remove_page = generic_error_remove_page, +}; diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h new file mode 100644 index 000000000000..71f721e1a71f --- /dev/null +++ b/fs/xfs/xfs_aops.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2005-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_AOPS_H__ +#define __XFS_AOPS_H__ + +extern struct workqueue_struct *xfsdatad_workqueue; +extern struct workqueue_struct *xfsconvertd_workqueue; +extern mempool_t *xfs_ioend_pool; + +/* + * Types of I/O for bmap clustering and I/O completion tracking. + */ +enum { + IO_DIRECT = 0, /* special case for direct I/O ioends */ + IO_DELALLOC, /* mapping covers delalloc region */ + IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */ + IO_OVERWRITE, /* mapping covers already allocated extent */ +}; + +#define XFS_IO_TYPES \ + { 0, "" }, \ + { IO_DELALLOC, "delalloc" }, \ + { IO_UNWRITTEN, "unwritten" }, \ + { IO_OVERWRITE, "overwrite" } + +/* + * xfs_ioend struct manages large extent writes for XFS. + * It can manage several multi-page bio's at once. + */ +typedef struct xfs_ioend { + struct xfs_ioend *io_list; /* next ioend in chain */ + unsigned int io_type; /* delalloc / unwritten */ + int io_error; /* I/O error code */ + atomic_t io_remaining; /* hold count */ + struct inode *io_inode; /* file being written to */ + struct buffer_head *io_buffer_head;/* buffer linked list head */ + struct buffer_head *io_buffer_tail;/* buffer linked list tail */ + size_t io_size; /* size of the extent */ + xfs_off_t io_offset; /* offset in the file */ + struct work_struct io_work; /* xfsdatad work queue */ + struct kiocb *io_iocb; + int io_result; +} xfs_ioend_t; + +extern const struct address_space_operations xfs_address_space_operations; +extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); + +extern void xfs_ioend_init(void); +extern void xfs_ioend_wait(struct xfs_inode *); + +extern void xfs_count_page_state(struct page *, int *, int *); + +#endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c new file mode 100644 index 000000000000..c57836dc778f --- /dev/null +++ b/fs/xfs/xfs_buf.c @@ -0,0 +1,1876 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_trace.h" + +static kmem_zone_t *xfs_buf_zone; +STATIC int xfsbufd(void *); +STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int); + +static struct workqueue_struct *xfslogd_workqueue; +struct workqueue_struct *xfsdatad_workqueue; +struct workqueue_struct *xfsconvertd_workqueue; + +#ifdef XFS_BUF_LOCK_TRACKING +# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid) +# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1) +# define XB_GET_OWNER(bp) ((bp)->b_last_holder) +#else +# define XB_SET_OWNER(bp) do { } while (0) +# define XB_CLEAR_OWNER(bp) do { } while (0) +# define XB_GET_OWNER(bp) do { } while (0) +#endif + +#define xb_to_gfp(flags) \ + ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \ + ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN) + +#define xb_to_km(flags) \ + (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP) + +#define xfs_buf_allocate(flags) \ + kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags)) +#define xfs_buf_deallocate(bp) \ + kmem_zone_free(xfs_buf_zone, (bp)); + +static inline int +xfs_buf_is_vmapped( + struct xfs_buf *bp) +{ + /* + * Return true if the buffer is vmapped. + * + * The XBF_MAPPED flag is set if the buffer should be mapped, but the + * code is clever enough to know it doesn't have to map a single page, + * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1. + */ + return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1; +} + +static inline int +xfs_buf_vmap_len( + struct xfs_buf *bp) +{ + return (bp->b_page_count * PAGE_SIZE) - bp->b_offset; +} + +/* + * xfs_buf_lru_add - add a buffer to the LRU. + * + * The LRU takes a new reference to the buffer so that it will only be freed + * once the shrinker takes the buffer off the LRU. + */ +STATIC void +xfs_buf_lru_add( + struct xfs_buf *bp) +{ + struct xfs_buftarg *btp = bp->b_target; + + spin_lock(&btp->bt_lru_lock); + if (list_empty(&bp->b_lru)) { + atomic_inc(&bp->b_hold); + list_add_tail(&bp->b_lru, &btp->bt_lru); + btp->bt_lru_nr++; + } + spin_unlock(&btp->bt_lru_lock); +} + +/* + * xfs_buf_lru_del - remove a buffer from the LRU + * + * The unlocked check is safe here because it only occurs when there are not + * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there + * to optimise the shrinker removing the buffer from the LRU and calling + * xfs_buf_free(). i.e. it removes an unnecessary round trip on the + * bt_lru_lock. + */ +STATIC void +xfs_buf_lru_del( + struct xfs_buf *bp) +{ + struct xfs_buftarg *btp = bp->b_target; + + if (list_empty(&bp->b_lru)) + return; + + spin_lock(&btp->bt_lru_lock); + if (!list_empty(&bp->b_lru)) { + list_del_init(&bp->b_lru); + btp->bt_lru_nr--; + } + spin_unlock(&btp->bt_lru_lock); +} + +/* + * When we mark a buffer stale, we remove the buffer from the LRU and clear the + * b_lru_ref count so that the buffer is freed immediately when the buffer + * reference count falls to zero. If the buffer is already on the LRU, we need + * to remove the reference that LRU holds on the buffer. + * + * This prevents build-up of stale buffers on the LRU. + */ +void +xfs_buf_stale( + struct xfs_buf *bp) +{ + bp->b_flags |= XBF_STALE; + atomic_set(&(bp)->b_lru_ref, 0); + if (!list_empty(&bp->b_lru)) { + struct xfs_buftarg *btp = bp->b_target; + + spin_lock(&btp->bt_lru_lock); + if (!list_empty(&bp->b_lru)) { + list_del_init(&bp->b_lru); + btp->bt_lru_nr--; + atomic_dec(&bp->b_hold); + } + spin_unlock(&btp->bt_lru_lock); + } + ASSERT(atomic_read(&bp->b_hold) >= 1); +} + +STATIC void +_xfs_buf_initialize( + xfs_buf_t *bp, + xfs_buftarg_t *target, + xfs_off_t range_base, + size_t range_length, + xfs_buf_flags_t flags) +{ + /* + * We don't want certain flags to appear in b_flags. + */ + flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD); + + memset(bp, 0, sizeof(xfs_buf_t)); + atomic_set(&bp->b_hold, 1); + atomic_set(&bp->b_lru_ref, 1); + init_completion(&bp->b_iowait); + INIT_LIST_HEAD(&bp->b_lru); + INIT_LIST_HEAD(&bp->b_list); + RB_CLEAR_NODE(&bp->b_rbnode); + sema_init(&bp->b_sema, 0); /* held, no waiters */ + XB_SET_OWNER(bp); + bp->b_target = target; + bp->b_file_offset = range_base; + /* + * Set buffer_length and count_desired to the same value initially. + * I/O routines should use count_desired, which will be the same in + * most cases but may be reset (e.g. XFS recovery). + */ + bp->b_buffer_length = bp->b_count_desired = range_length; + bp->b_flags = flags; + bp->b_bn = XFS_BUF_DADDR_NULL; + atomic_set(&bp->b_pin_count, 0); + init_waitqueue_head(&bp->b_waiters); + + XFS_STATS_INC(xb_create); + + trace_xfs_buf_init(bp, _RET_IP_); +} + +/* + * Allocate a page array capable of holding a specified number + * of pages, and point the page buf at it. + */ +STATIC int +_xfs_buf_get_pages( + xfs_buf_t *bp, + int page_count, + xfs_buf_flags_t flags) +{ + /* Make sure that we have a page list */ + if (bp->b_pages == NULL) { + bp->b_offset = xfs_buf_poff(bp->b_file_offset); + bp->b_page_count = page_count; + if (page_count <= XB_PAGES) { + bp->b_pages = bp->b_page_array; + } else { + bp->b_pages = kmem_alloc(sizeof(struct page *) * + page_count, xb_to_km(flags)); + if (bp->b_pages == NULL) + return -ENOMEM; + } + memset(bp->b_pages, 0, sizeof(struct page *) * page_count); + } + return 0; +} + +/* + * Frees b_pages if it was allocated. + */ +STATIC void +_xfs_buf_free_pages( + xfs_buf_t *bp) +{ + if (bp->b_pages != bp->b_page_array) { + kmem_free(bp->b_pages); + bp->b_pages = NULL; + } +} + +/* + * Releases the specified buffer. + * + * The modification state of any associated pages is left unchanged. + * The buffer most not be on any hash - use xfs_buf_rele instead for + * hashed and refcounted buffers + */ +void +xfs_buf_free( + xfs_buf_t *bp) +{ + trace_xfs_buf_free(bp, _RET_IP_); + + ASSERT(list_empty(&bp->b_lru)); + + if (bp->b_flags & _XBF_PAGES) { + uint i; + + if (xfs_buf_is_vmapped(bp)) + vm_unmap_ram(bp->b_addr - bp->b_offset, + bp->b_page_count); + + for (i = 0; i < bp->b_page_count; i++) { + struct page *page = bp->b_pages[i]; + + __free_page(page); + } + } else if (bp->b_flags & _XBF_KMEM) + kmem_free(bp->b_addr); + _xfs_buf_free_pages(bp); + xfs_buf_deallocate(bp); +} + +/* + * Allocates all the pages for buffer in question and builds it's page list. + */ +STATIC int +xfs_buf_allocate_memory( + xfs_buf_t *bp, + uint flags) +{ + size_t size = bp->b_count_desired; + size_t nbytes, offset; + gfp_t gfp_mask = xb_to_gfp(flags); + unsigned short page_count, i; + xfs_off_t end; + int error; + + /* + * for buffers that are contained within a single page, just allocate + * the memory from the heap - there's no need for the complexity of + * page arrays to keep allocation down to order 0. + */ + if (bp->b_buffer_length < PAGE_SIZE) { + bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags)); + if (!bp->b_addr) { + /* low memory - use alloc_page loop instead */ + goto use_alloc_page; + } + + if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) & + PAGE_MASK) != + ((unsigned long)bp->b_addr & PAGE_MASK)) { + /* b_addr spans two pages - use alloc_page instead */ + kmem_free(bp->b_addr); + bp->b_addr = NULL; + goto use_alloc_page; + } + bp->b_offset = offset_in_page(bp->b_addr); + bp->b_pages = bp->b_page_array; + bp->b_pages[0] = virt_to_page(bp->b_addr); + bp->b_page_count = 1; + bp->b_flags |= XBF_MAPPED | _XBF_KMEM; + return 0; + } + +use_alloc_page: + end = bp->b_file_offset + bp->b_buffer_length; + page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset); + error = _xfs_buf_get_pages(bp, page_count, flags); + if (unlikely(error)) + return error; + + offset = bp->b_offset; + bp->b_flags |= _XBF_PAGES; + + for (i = 0; i < bp->b_page_count; i++) { + struct page *page; + uint retries = 0; +retry: + page = alloc_page(gfp_mask); + if (unlikely(page == NULL)) { + if (flags & XBF_READ_AHEAD) { + bp->b_page_count = i; + error = ENOMEM; + goto out_free_pages; + } + + /* + * This could deadlock. + * + * But until all the XFS lowlevel code is revamped to + * handle buffer allocation failures we can't do much. + */ + if (!(++retries % 100)) + xfs_err(NULL, + "possible memory allocation deadlock in %s (mode:0x%x)", + __func__, gfp_mask); + + XFS_STATS_INC(xb_page_retries); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + + XFS_STATS_INC(xb_page_found); + + nbytes = min_t(size_t, size, PAGE_SIZE - offset); + size -= nbytes; + bp->b_pages[i] = page; + offset = 0; + } + return 0; + +out_free_pages: + for (i = 0; i < bp->b_page_count; i++) + __free_page(bp->b_pages[i]); + return error; +} + +/* + * Map buffer into kernel address-space if necessary. + */ +STATIC int +_xfs_buf_map_pages( + xfs_buf_t *bp, + uint flags) +{ + ASSERT(bp->b_flags & _XBF_PAGES); + if (bp->b_page_count == 1) { + /* A single page buffer is always mappable */ + bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; + bp->b_flags |= XBF_MAPPED; + } else if (flags & XBF_MAPPED) { + int retried = 0; + + do { + bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, + -1, PAGE_KERNEL); + if (bp->b_addr) + break; + vm_unmap_aliases(); + } while (retried++ <= 1); + + if (!bp->b_addr) + return -ENOMEM; + bp->b_addr += bp->b_offset; + bp->b_flags |= XBF_MAPPED; + } + + return 0; +} + +/* + * Finding and Reading Buffers + */ + +/* + * Look up, and creates if absent, a lockable buffer for + * a given range of an inode. The buffer is returned + * locked. If other overlapping buffers exist, they are + * released before the new buffer is created and locked, + * which may imply that this call will block until those buffers + * are unlocked. No I/O is implied by this call. + */ +xfs_buf_t * +_xfs_buf_find( + xfs_buftarg_t *btp, /* block device target */ + xfs_off_t ioff, /* starting offset of range */ + size_t isize, /* length of range */ + xfs_buf_flags_t flags, + xfs_buf_t *new_bp) +{ + xfs_off_t range_base; + size_t range_length; + struct xfs_perag *pag; + struct rb_node **rbp; + struct rb_node *parent; + xfs_buf_t *bp; + + range_base = (ioff << BBSHIFT); + range_length = (isize << BBSHIFT); + + /* Check for IOs smaller than the sector size / not sector aligned */ + ASSERT(!(range_length < (1 << btp->bt_sshift))); + ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); + + /* get tree root */ + pag = xfs_perag_get(btp->bt_mount, + xfs_daddr_to_agno(btp->bt_mount, ioff)); + + /* walk tree */ + spin_lock(&pag->pag_buf_lock); + rbp = &pag->pag_buf_tree.rb_node; + parent = NULL; + bp = NULL; + while (*rbp) { + parent = *rbp; + bp = rb_entry(parent, struct xfs_buf, b_rbnode); + + if (range_base < bp->b_file_offset) + rbp = &(*rbp)->rb_left; + else if (range_base > bp->b_file_offset) + rbp = &(*rbp)->rb_right; + else { + /* + * found a block offset match. If the range doesn't + * match, the only way this is allowed is if the buffer + * in the cache is stale and the transaction that made + * it stale has not yet committed. i.e. we are + * reallocating a busy extent. Skip this buffer and + * continue searching to the right for an exact match. + */ + if (bp->b_buffer_length != range_length) { + ASSERT(bp->b_flags & XBF_STALE); + rbp = &(*rbp)->rb_right; + continue; + } + atomic_inc(&bp->b_hold); + goto found; + } + } + + /* No match found */ + if (new_bp) { + _xfs_buf_initialize(new_bp, btp, range_base, + range_length, flags); + rb_link_node(&new_bp->b_rbnode, parent, rbp); + rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); + /* the buffer keeps the perag reference until it is freed */ + new_bp->b_pag = pag; + spin_unlock(&pag->pag_buf_lock); + } else { + XFS_STATS_INC(xb_miss_locked); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); + } + return new_bp; + +found: + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); + + if (!xfs_buf_trylock(bp)) { + if (flags & XBF_TRYLOCK) { + xfs_buf_rele(bp); + XFS_STATS_INC(xb_busy_locked); + return NULL; + } + xfs_buf_lock(bp); + XFS_STATS_INC(xb_get_locked_waited); + } + + /* + * if the buffer is stale, clear all the external state associated with + * it. We need to keep flags such as how we allocated the buffer memory + * intact here. + */ + if (bp->b_flags & XBF_STALE) { + ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); + bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES; + } + + trace_xfs_buf_find(bp, flags, _RET_IP_); + XFS_STATS_INC(xb_get_locked); + return bp; +} + +/* + * Assembles a buffer covering the specified range. + * Storage in memory for all portions of the buffer will be allocated, + * although backing storage may not be. + */ +xfs_buf_t * +xfs_buf_get( + xfs_buftarg_t *target,/* target for buffer */ + xfs_off_t ioff, /* starting offset of range */ + size_t isize, /* length of range */ + xfs_buf_flags_t flags) +{ + xfs_buf_t *bp, *new_bp; + int error = 0; + + new_bp = xfs_buf_allocate(flags); + if (unlikely(!new_bp)) + return NULL; + + bp = _xfs_buf_find(target, ioff, isize, flags, new_bp); + if (bp == new_bp) { + error = xfs_buf_allocate_memory(bp, flags); + if (error) + goto no_buffer; + } else { + xfs_buf_deallocate(new_bp); + if (unlikely(bp == NULL)) + return NULL; + } + + if (!(bp->b_flags & XBF_MAPPED)) { + error = _xfs_buf_map_pages(bp, flags); + if (unlikely(error)) { + xfs_warn(target->bt_mount, + "%s: failed to map pages\n", __func__); + goto no_buffer; + } + } + + XFS_STATS_INC(xb_get); + + /* + * Always fill in the block number now, the mapped cases can do + * their own overlay of this later. + */ + bp->b_bn = ioff; + bp->b_count_desired = bp->b_buffer_length; + + trace_xfs_buf_get(bp, flags, _RET_IP_); + return bp; + + no_buffer: + if (flags & (XBF_LOCK | XBF_TRYLOCK)) + xfs_buf_unlock(bp); + xfs_buf_rele(bp); + return NULL; +} + +STATIC int +_xfs_buf_read( + xfs_buf_t *bp, + xfs_buf_flags_t flags) +{ + int status; + + ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); + ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); + + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD); + bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD); + + status = xfs_buf_iorequest(bp); + if (status || bp->b_error || (flags & XBF_ASYNC)) + return status; + return xfs_buf_iowait(bp); +} + +xfs_buf_t * +xfs_buf_read( + xfs_buftarg_t *target, + xfs_off_t ioff, + size_t isize, + xfs_buf_flags_t flags) +{ + xfs_buf_t *bp; + + flags |= XBF_READ; + + bp = xfs_buf_get(target, ioff, isize, flags); + if (bp) { + trace_xfs_buf_read(bp, flags, _RET_IP_); + + if (!XFS_BUF_ISDONE(bp)) { + XFS_STATS_INC(xb_get_read); + _xfs_buf_read(bp, flags); + } else if (flags & XBF_ASYNC) { + /* + * Read ahead call which is already satisfied, + * drop the buffer + */ + goto no_buffer; + } else { + /* We do not want read in the flags */ + bp->b_flags &= ~XBF_READ; + } + } + + return bp; + + no_buffer: + if (flags & (XBF_LOCK | XBF_TRYLOCK)) + xfs_buf_unlock(bp); + xfs_buf_rele(bp); + return NULL; +} + +/* + * If we are not low on memory then do the readahead in a deadlock + * safe manner. + */ +void +xfs_buf_readahead( + xfs_buftarg_t *target, + xfs_off_t ioff, + size_t isize) +{ + if (bdi_read_congested(target->bt_bdi)) + return; + + xfs_buf_read(target, ioff, isize, + XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); +} + +/* + * Read an uncached buffer from disk. Allocates and returns a locked + * buffer containing the disk contents or nothing. + */ +struct xfs_buf * +xfs_buf_read_uncached( + struct xfs_mount *mp, + struct xfs_buftarg *target, + xfs_daddr_t daddr, + size_t length, + int flags) +{ + xfs_buf_t *bp; + int error; + + bp = xfs_buf_get_uncached(target, length, flags); + if (!bp) + return NULL; + + /* set up the buffer for a read IO */ + XFS_BUF_SET_ADDR(bp, daddr); + XFS_BUF_READ(bp); + + xfsbdstrat(mp, bp); + error = xfs_buf_iowait(bp); + if (error || bp->b_error) { + xfs_buf_relse(bp); + return NULL; + } + return bp; +} + +xfs_buf_t * +xfs_buf_get_empty( + size_t len, + xfs_buftarg_t *target) +{ + xfs_buf_t *bp; + + bp = xfs_buf_allocate(0); + if (bp) + _xfs_buf_initialize(bp, target, 0, len, 0); + return bp; +} + +/* + * Return a buffer allocated as an empty buffer and associated to external + * memory via xfs_buf_associate_memory() back to it's empty state. + */ +void +xfs_buf_set_empty( + struct xfs_buf *bp, + size_t len) +{ + if (bp->b_pages) + _xfs_buf_free_pages(bp); + + bp->b_pages = NULL; + bp->b_page_count = 0; + bp->b_addr = NULL; + bp->b_file_offset = 0; + bp->b_buffer_length = bp->b_count_desired = len; + bp->b_bn = XFS_BUF_DADDR_NULL; + bp->b_flags &= ~XBF_MAPPED; +} + +static inline struct page * +mem_to_page( + void *addr) +{ + if ((!is_vmalloc_addr(addr))) { + return virt_to_page(addr); + } else { + return vmalloc_to_page(addr); + } +} + +int +xfs_buf_associate_memory( + xfs_buf_t *bp, + void *mem, + size_t len) +{ + int rval; + int i = 0; + unsigned long pageaddr; + unsigned long offset; + size_t buflen; + int page_count; + + pageaddr = (unsigned long)mem & PAGE_MASK; + offset = (unsigned long)mem - pageaddr; + buflen = PAGE_ALIGN(len + offset); + page_count = buflen >> PAGE_SHIFT; + + /* Free any previous set of page pointers */ + if (bp->b_pages) + _xfs_buf_free_pages(bp); + + bp->b_pages = NULL; + bp->b_addr = mem; + + rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK); + if (rval) + return rval; + + bp->b_offset = offset; + + for (i = 0; i < bp->b_page_count; i++) { + bp->b_pages[i] = mem_to_page((void *)pageaddr); + pageaddr += PAGE_SIZE; + } + + bp->b_count_desired = len; + bp->b_buffer_length = buflen; + bp->b_flags |= XBF_MAPPED; + + return 0; +} + +xfs_buf_t * +xfs_buf_get_uncached( + struct xfs_buftarg *target, + size_t len, + int flags) +{ + unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; + int error, i; + xfs_buf_t *bp; + + bp = xfs_buf_allocate(0); + if (unlikely(bp == NULL)) + goto fail; + _xfs_buf_initialize(bp, target, 0, len, 0); + + error = _xfs_buf_get_pages(bp, page_count, 0); + if (error) + goto fail_free_buf; + + for (i = 0; i < page_count; i++) { + bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); + if (!bp->b_pages[i]) + goto fail_free_mem; + } + bp->b_flags |= _XBF_PAGES; + + error = _xfs_buf_map_pages(bp, XBF_MAPPED); + if (unlikely(error)) { + xfs_warn(target->bt_mount, + "%s: failed to map pages\n", __func__); + goto fail_free_mem; + } + + trace_xfs_buf_get_uncached(bp, _RET_IP_); + return bp; + + fail_free_mem: + while (--i >= 0) + __free_page(bp->b_pages[i]); + _xfs_buf_free_pages(bp); + fail_free_buf: + xfs_buf_deallocate(bp); + fail: + return NULL; +} + +/* + * Increment reference count on buffer, to hold the buffer concurrently + * with another thread which may release (free) the buffer asynchronously. + * Must hold the buffer already to call this function. + */ +void +xfs_buf_hold( + xfs_buf_t *bp) +{ + trace_xfs_buf_hold(bp, _RET_IP_); + atomic_inc(&bp->b_hold); +} + +/* + * Releases a hold on the specified buffer. If the + * the hold count is 1, calls xfs_buf_free. + */ +void +xfs_buf_rele( + xfs_buf_t *bp) +{ + struct xfs_perag *pag = bp->b_pag; + + trace_xfs_buf_rele(bp, _RET_IP_); + + if (!pag) { + ASSERT(list_empty(&bp->b_lru)); + ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); + if (atomic_dec_and_test(&bp->b_hold)) + xfs_buf_free(bp); + return; + } + + ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); + + ASSERT(atomic_read(&bp->b_hold) > 0); + if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { + if (!(bp->b_flags & XBF_STALE) && + atomic_read(&bp->b_lru_ref)) { + xfs_buf_lru_add(bp); + spin_unlock(&pag->pag_buf_lock); + } else { + xfs_buf_lru_del(bp); + ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); + rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); + spin_unlock(&pag->pag_buf_lock); + xfs_perag_put(pag); + xfs_buf_free(bp); + } + } +} + + +/* + * Lock a buffer object, if it is not already locked. + * + * If we come across a stale, pinned, locked buffer, we know that we are + * being asked to lock a buffer that has been reallocated. Because it is + * pinned, we know that the log has not been pushed to disk and hence it + * will still be locked. Rather than continuing to have trylock attempts + * fail until someone else pushes the log, push it ourselves before + * returning. This means that the xfsaild will not get stuck trying + * to push on stale inode buffers. + */ +int +xfs_buf_trylock( + struct xfs_buf *bp) +{ + int locked; + + locked = down_trylock(&bp->b_sema) == 0; + if (locked) + XB_SET_OWNER(bp); + else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) + xfs_log_force(bp->b_target->bt_mount, 0); + + trace_xfs_buf_trylock(bp, _RET_IP_); + return locked; +} + +/* + * Lock a buffer object. + * + * If we come across a stale, pinned, locked buffer, we know that we + * are being asked to lock a buffer that has been reallocated. Because + * it is pinned, we know that the log has not been pushed to disk and + * hence it will still be locked. Rather than sleeping until someone + * else pushes the log, push it ourselves before trying to get the lock. + */ +void +xfs_buf_lock( + struct xfs_buf *bp) +{ + trace_xfs_buf_lock(bp, _RET_IP_); + + if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) + xfs_log_force(bp->b_target->bt_mount, 0); + down(&bp->b_sema); + XB_SET_OWNER(bp); + + trace_xfs_buf_lock_done(bp, _RET_IP_); +} + +/* + * Releases the lock on the buffer object. + * If the buffer is marked delwri but is not queued, do so before we + * unlock the buffer as we need to set flags correctly. We also need to + * take a reference for the delwri queue because the unlocker is going to + * drop their's and they don't know we just queued it. + */ +void +xfs_buf_unlock( + struct xfs_buf *bp) +{ + if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { + atomic_inc(&bp->b_hold); + bp->b_flags |= XBF_ASYNC; + xfs_buf_delwri_queue(bp, 0); + } + + XB_CLEAR_OWNER(bp); + up(&bp->b_sema); + + trace_xfs_buf_unlock(bp, _RET_IP_); +} + +STATIC void +xfs_buf_wait_unpin( + xfs_buf_t *bp) +{ + DECLARE_WAITQUEUE (wait, current); + + if (atomic_read(&bp->b_pin_count) == 0) + return; + + add_wait_queue(&bp->b_waiters, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (atomic_read(&bp->b_pin_count) == 0) + break; + io_schedule(); + } + remove_wait_queue(&bp->b_waiters, &wait); + set_current_state(TASK_RUNNING); +} + +/* + * Buffer Utility Routines + */ + +STATIC void +xfs_buf_iodone_work( + struct work_struct *work) +{ + xfs_buf_t *bp = + container_of(work, xfs_buf_t, b_iodone_work); + + if (bp->b_iodone) + (*(bp->b_iodone))(bp); + else if (bp->b_flags & XBF_ASYNC) + xfs_buf_relse(bp); +} + +void +xfs_buf_ioend( + xfs_buf_t *bp, + int schedule) +{ + trace_xfs_buf_iodone(bp, _RET_IP_); + + bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD); + if (bp->b_error == 0) + bp->b_flags |= XBF_DONE; + + if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) { + if (schedule) { + INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work); + queue_work(xfslogd_workqueue, &bp->b_iodone_work); + } else { + xfs_buf_iodone_work(&bp->b_iodone_work); + } + } else { + complete(&bp->b_iowait); + } +} + +void +xfs_buf_ioerror( + xfs_buf_t *bp, + int error) +{ + ASSERT(error >= 0 && error <= 0xffff); + bp->b_error = (unsigned short)error; + trace_xfs_buf_ioerror(bp, error, _RET_IP_); +} + +int +xfs_bwrite( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + int error; + + bp->b_flags |= XBF_WRITE; + bp->b_flags &= ~(XBF_ASYNC | XBF_READ); + + xfs_buf_delwri_dequeue(bp); + xfs_bdstrat_cb(bp); + + error = xfs_buf_iowait(bp); + if (error) + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + xfs_buf_relse(bp); + return error; +} + +void +xfs_bdwrite( + void *mp, + struct xfs_buf *bp) +{ + trace_xfs_buf_bdwrite(bp, _RET_IP_); + + bp->b_flags &= ~XBF_READ; + bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); + + xfs_buf_delwri_queue(bp, 1); +} + +/* + * Called when we want to stop a buffer from getting written or read. + * We attach the EIO error, muck with its flags, and call xfs_buf_ioend + * so that the proper iodone callbacks get called. + */ +STATIC int +xfs_bioerror( + xfs_buf_t *bp) +{ +#ifdef XFSERRORDEBUG + ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone); +#endif + + /* + * No need to wait until the buffer is unpinned, we aren't flushing it. + */ + xfs_buf_ioerror(bp, EIO); + + /* + * We're calling xfs_buf_ioend, so delete XBF_DONE flag. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_UNDONE(bp); + XFS_BUF_STALE(bp); + + xfs_buf_ioend(bp, 0); + + return EIO; +} + +/* + * Same as xfs_bioerror, except that we are releasing the buffer + * here ourselves, and avoiding the xfs_buf_ioend call. + * This is meant for userdata errors; metadata bufs come with + * iodone functions attached, so that we can track down errors. + */ +STATIC int +xfs_bioerror_relse( + struct xfs_buf *bp) +{ + int64_t fl = bp->b_flags; + /* + * No need to wait until the buffer is unpinned. + * We aren't flushing it. + * + * chunkhold expects B_DONE to be set, whether + * we actually finish the I/O or not. We don't want to + * change that interface. + */ + XFS_BUF_UNREAD(bp); + XFS_BUF_UNDELAYWRITE(bp); + XFS_BUF_DONE(bp); + XFS_BUF_STALE(bp); + bp->b_iodone = NULL; + if (!(fl & XBF_ASYNC)) { + /* + * Mark b_error and B_ERROR _both_. + * Lot's of chunkcache code assumes that. + * There's no reason to mark error for + * ASYNC buffers. + */ + xfs_buf_ioerror(bp, EIO); + XFS_BUF_FINISH_IOWAIT(bp); + } else { + xfs_buf_relse(bp); + } + + return EIO; +} + + +/* + * All xfs metadata buffers except log state machine buffers + * get this attached as their b_bdstrat callback function. + * This is so that we can catch a buffer + * after prematurely unpinning it to forcibly shutdown the filesystem. + */ +int +xfs_bdstrat_cb( + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + /* + * Metadata write that didn't get logged but + * written delayed anyway. These aren't associated + * with a transaction, and can be ignored. + */ + if (!bp->b_iodone && !XFS_BUF_ISREAD(bp)) + return xfs_bioerror_relse(bp); + else + return xfs_bioerror(bp); + } + + xfs_buf_iorequest(bp); + return 0; +} + +/* + * Wrapper around bdstrat so that we can stop data from going to disk in case + * we are shutting down the filesystem. Typically user data goes thru this + * path; one of the exceptions is the superblock. + */ +void +xfsbdstrat( + struct xfs_mount *mp, + struct xfs_buf *bp) +{ + if (XFS_FORCED_SHUTDOWN(mp)) { + trace_xfs_bdstrat_shut(bp, _RET_IP_); + xfs_bioerror_relse(bp); + return; + } + + xfs_buf_iorequest(bp); +} + +STATIC void +_xfs_buf_ioend( + xfs_buf_t *bp, + int schedule) +{ + if (atomic_dec_and_test(&bp->b_io_remaining) == 1) + xfs_buf_ioend(bp, schedule); +} + +STATIC void +xfs_buf_bio_end_io( + struct bio *bio, + int error) +{ + xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private; + + xfs_buf_ioerror(bp, -error); + + if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ)) + invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp)); + + _xfs_buf_ioend(bp, 1); + bio_put(bio); +} + +STATIC void +_xfs_buf_ioapply( + xfs_buf_t *bp) +{ + int rw, map_i, total_nr_pages, nr_pages; + struct bio *bio; + int offset = bp->b_offset; + int size = bp->b_count_desired; + sector_t sector = bp->b_bn; + + total_nr_pages = bp->b_page_count; + map_i = 0; + + if (bp->b_flags & XBF_WRITE) { + if (bp->b_flags & XBF_SYNCIO) + rw = WRITE_SYNC; + else + rw = WRITE; + if (bp->b_flags & XBF_FUA) + rw |= REQ_FUA; + if (bp->b_flags & XBF_FLUSH) + rw |= REQ_FLUSH; + } else if (bp->b_flags & XBF_READ_AHEAD) { + rw = READA; + } else { + rw = READ; + } + + /* we only use the buffer cache for meta-data */ + rw |= REQ_META; + +next_chunk: + atomic_inc(&bp->b_io_remaining); + nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); + if (nr_pages > total_nr_pages) + nr_pages = total_nr_pages; + + bio = bio_alloc(GFP_NOIO, nr_pages); + bio->bi_bdev = bp->b_target->bt_bdev; + bio->bi_sector = sector; + bio->bi_end_io = xfs_buf_bio_end_io; + bio->bi_private = bp; + + + for (; size && nr_pages; nr_pages--, map_i++) { + int rbytes, nbytes = PAGE_SIZE - offset; + + if (nbytes > size) + nbytes = size; + + rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset); + if (rbytes < nbytes) + break; + + offset = 0; + sector += nbytes >> BBSHIFT; + size -= nbytes; + total_nr_pages--; + } + + if (likely(bio->bi_size)) { + if (xfs_buf_is_vmapped(bp)) { + flush_kernel_vmap_range(bp->b_addr, + xfs_buf_vmap_len(bp)); + } + submit_bio(rw, bio); + if (size) + goto next_chunk; + } else { + xfs_buf_ioerror(bp, EIO); + bio_put(bio); + } +} + +int +xfs_buf_iorequest( + xfs_buf_t *bp) +{ + trace_xfs_buf_iorequest(bp, _RET_IP_); + + if (bp->b_flags & XBF_DELWRI) { + xfs_buf_delwri_queue(bp, 1); + return 0; + } + + if (bp->b_flags & XBF_WRITE) { + xfs_buf_wait_unpin(bp); + } + + xfs_buf_hold(bp); + + /* Set the count to 1 initially, this will stop an I/O + * completion callout which happens before we have started + * all the I/O from calling xfs_buf_ioend too early. + */ + atomic_set(&bp->b_io_remaining, 1); + _xfs_buf_ioapply(bp); + _xfs_buf_ioend(bp, 0); + + xfs_buf_rele(bp); + return 0; +} + +/* + * Waits for I/O to complete on the buffer supplied. + * It returns immediately if no I/O is pending. + * It returns the I/O error code, if any, or 0 if there was no error. + */ +int +xfs_buf_iowait( + xfs_buf_t *bp) +{ + trace_xfs_buf_iowait(bp, _RET_IP_); + + wait_for_completion(&bp->b_iowait); + + trace_xfs_buf_iowait_done(bp, _RET_IP_); + return bp->b_error; +} + +xfs_caddr_t +xfs_buf_offset( + xfs_buf_t *bp, + size_t offset) +{ + struct page *page; + + if (bp->b_flags & XBF_MAPPED) + return bp->b_addr + offset; + + offset += bp->b_offset; + page = bp->b_pages[offset >> PAGE_SHIFT]; + return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1)); +} + +/* + * Move data into or out of a buffer. + */ +void +xfs_buf_iomove( + xfs_buf_t *bp, /* buffer to process */ + size_t boff, /* starting buffer offset */ + size_t bsize, /* length to copy */ + void *data, /* data address */ + xfs_buf_rw_t mode) /* read/write/zero flag */ +{ + size_t bend, cpoff, csize; + struct page *page; + + bend = boff + bsize; + while (boff < bend) { + page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)]; + cpoff = xfs_buf_poff(boff + bp->b_offset); + csize = min_t(size_t, + PAGE_SIZE-cpoff, bp->b_count_desired-boff); + + ASSERT(((csize + cpoff) <= PAGE_SIZE)); + + switch (mode) { + case XBRW_ZERO: + memset(page_address(page) + cpoff, 0, csize); + break; + case XBRW_READ: + memcpy(data, page_address(page) + cpoff, csize); + break; + case XBRW_WRITE: + memcpy(page_address(page) + cpoff, data, csize); + } + + boff += csize; + data += csize; + } +} + +/* + * Handling of buffer targets (buftargs). + */ + +/* + * Wait for any bufs with callbacks that have been submitted but have not yet + * returned. These buffers will have an elevated hold count, so wait on those + * while freeing all the buffers only held by the LRU. + */ +void +xfs_wait_buftarg( + struct xfs_buftarg *btp) +{ + struct xfs_buf *bp; + +restart: + spin_lock(&btp->bt_lru_lock); + while (!list_empty(&btp->bt_lru)) { + bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); + if (atomic_read(&bp->b_hold) > 1) { + spin_unlock(&btp->bt_lru_lock); + delay(100); + goto restart; + } + /* + * clear the LRU reference count so the bufer doesn't get + * ignored in xfs_buf_rele(). + */ + atomic_set(&bp->b_lru_ref, 0); + spin_unlock(&btp->bt_lru_lock); + xfs_buf_rele(bp); + spin_lock(&btp->bt_lru_lock); + } + spin_unlock(&btp->bt_lru_lock); +} + +int +xfs_buftarg_shrink( + struct shrinker *shrink, + struct shrink_control *sc) +{ + struct xfs_buftarg *btp = container_of(shrink, + struct xfs_buftarg, bt_shrinker); + struct xfs_buf *bp; + int nr_to_scan = sc->nr_to_scan; + LIST_HEAD(dispose); + + if (!nr_to_scan) + return btp->bt_lru_nr; + + spin_lock(&btp->bt_lru_lock); + while (!list_empty(&btp->bt_lru)) { + if (nr_to_scan-- <= 0) + break; + + bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); + + /* + * Decrement the b_lru_ref count unless the value is already + * zero. If the value is already zero, we need to reclaim the + * buffer, otherwise it gets another trip through the LRU. + */ + if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) { + list_move_tail(&bp->b_lru, &btp->bt_lru); + continue; + } + + /* + * remove the buffer from the LRU now to avoid needing another + * lock round trip inside xfs_buf_rele(). + */ + list_move(&bp->b_lru, &dispose); + btp->bt_lru_nr--; + } + spin_unlock(&btp->bt_lru_lock); + + while (!list_empty(&dispose)) { + bp = list_first_entry(&dispose, struct xfs_buf, b_lru); + list_del_init(&bp->b_lru); + xfs_buf_rele(bp); + } + + return btp->bt_lru_nr; +} + +void +xfs_free_buftarg( + struct xfs_mount *mp, + struct xfs_buftarg *btp) +{ + unregister_shrinker(&btp->bt_shrinker); + + xfs_flush_buftarg(btp, 1); + if (mp->m_flags & XFS_MOUNT_BARRIER) + xfs_blkdev_issue_flush(btp); + + kthread_stop(btp->bt_task); + kmem_free(btp); +} + +STATIC int +xfs_setsize_buftarg_flags( + xfs_buftarg_t *btp, + unsigned int blocksize, + unsigned int sectorsize, + int verbose) +{ + btp->bt_bsize = blocksize; + btp->bt_sshift = ffs(sectorsize) - 1; + btp->bt_smask = sectorsize - 1; + + if (set_blocksize(btp->bt_bdev, sectorsize)) { + xfs_warn(btp->bt_mount, + "Cannot set_blocksize to %u on device %s\n", + sectorsize, xfs_buf_target_name(btp)); + return EINVAL; + } + + return 0; +} + +/* + * When allocating the initial buffer target we have not yet + * read in the superblock, so don't know what sized sectors + * are being used is at this early stage. Play safe. + */ +STATIC int +xfs_setsize_buftarg_early( + xfs_buftarg_t *btp, + struct block_device *bdev) +{ + return xfs_setsize_buftarg_flags(btp, + PAGE_SIZE, bdev_logical_block_size(bdev), 0); +} + +int +xfs_setsize_buftarg( + xfs_buftarg_t *btp, + unsigned int blocksize, + unsigned int sectorsize) +{ + return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); +} + +STATIC int +xfs_alloc_delwrite_queue( + xfs_buftarg_t *btp, + const char *fsname) +{ + INIT_LIST_HEAD(&btp->bt_delwrite_queue); + spin_lock_init(&btp->bt_delwrite_lock); + btp->bt_flags = 0; + btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname); + if (IS_ERR(btp->bt_task)) + return PTR_ERR(btp->bt_task); + return 0; +} + +xfs_buftarg_t * +xfs_alloc_buftarg( + struct xfs_mount *mp, + struct block_device *bdev, + int external, + const char *fsname) +{ + xfs_buftarg_t *btp; + + btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); + + btp->bt_mount = mp; + btp->bt_dev = bdev->bd_dev; + btp->bt_bdev = bdev; + btp->bt_bdi = blk_get_backing_dev_info(bdev); + if (!btp->bt_bdi) + goto error; + + INIT_LIST_HEAD(&btp->bt_lru); + spin_lock_init(&btp->bt_lru_lock); + if (xfs_setsize_buftarg_early(btp, bdev)) + goto error; + if (xfs_alloc_delwrite_queue(btp, fsname)) + goto error; + btp->bt_shrinker.shrink = xfs_buftarg_shrink; + btp->bt_shrinker.seeks = DEFAULT_SEEKS; + register_shrinker(&btp->bt_shrinker); + return btp; + +error: + kmem_free(btp); + return NULL; +} + + +/* + * Delayed write buffer handling + */ +STATIC void +xfs_buf_delwri_queue( + xfs_buf_t *bp, + int unlock) +{ + struct list_head *dwq = &bp->b_target->bt_delwrite_queue; + spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; + + trace_xfs_buf_delwri_queue(bp, _RET_IP_); + + ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC)); + + spin_lock(dwlk); + /* If already in the queue, dequeue and place at tail */ + if (!list_empty(&bp->b_list)) { + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + if (unlock) + atomic_dec(&bp->b_hold); + list_del(&bp->b_list); + } + + if (list_empty(dwq)) { + /* start xfsbufd as it is about to have something to do */ + wake_up_process(bp->b_target->bt_task); + } + + bp->b_flags |= _XBF_DELWRI_Q; + list_add_tail(&bp->b_list, dwq); + bp->b_queuetime = jiffies; + spin_unlock(dwlk); + + if (unlock) + xfs_buf_unlock(bp); +} + +void +xfs_buf_delwri_dequeue( + xfs_buf_t *bp) +{ + spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock; + int dequeued = 0; + + spin_lock(dwlk); + if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) { + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + list_del_init(&bp->b_list); + dequeued = 1; + } + bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q); + spin_unlock(dwlk); + + if (dequeued) + xfs_buf_rele(bp); + + trace_xfs_buf_delwri_dequeue(bp, _RET_IP_); +} + +/* + * If a delwri buffer needs to be pushed before it has aged out, then promote + * it to the head of the delwri queue so that it will be flushed on the next + * xfsbufd run. We do this by resetting the queuetime of the buffer to be older + * than the age currently needed to flush the buffer. Hence the next time the + * xfsbufd sees it is guaranteed to be considered old enough to flush. + */ +void +xfs_buf_delwri_promote( + struct xfs_buf *bp) +{ + struct xfs_buftarg *btp = bp->b_target; + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1; + + ASSERT(bp->b_flags & XBF_DELWRI); + ASSERT(bp->b_flags & _XBF_DELWRI_Q); + + /* + * Check the buffer age before locking the delayed write queue as we + * don't need to promote buffers that are already past the flush age. + */ + if (bp->b_queuetime < jiffies - age) + return; + bp->b_queuetime = jiffies - age; + spin_lock(&btp->bt_delwrite_lock); + list_move(&bp->b_list, &btp->bt_delwrite_queue); + spin_unlock(&btp->bt_delwrite_lock); +} + +STATIC void +xfs_buf_runall_queues( + struct workqueue_struct *queue) +{ + flush_workqueue(queue); +} + +/* + * Move as many buffers as specified to the supplied list + * idicating if we skipped any buffers to prevent deadlocks. + */ +STATIC int +xfs_buf_delwri_split( + xfs_buftarg_t *target, + struct list_head *list, + unsigned long age) +{ + xfs_buf_t *bp, *n; + struct list_head *dwq = &target->bt_delwrite_queue; + spinlock_t *dwlk = &target->bt_delwrite_lock; + int skipped = 0; + int force; + + force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags); + INIT_LIST_HEAD(list); + spin_lock(dwlk); + list_for_each_entry_safe(bp, n, dwq, b_list) { + ASSERT(bp->b_flags & XBF_DELWRI); + + if (!xfs_buf_ispinned(bp) && xfs_buf_trylock(bp)) { + if (!force && + time_before(jiffies, bp->b_queuetime + age)) { + xfs_buf_unlock(bp); + break; + } + + bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q); + bp->b_flags |= XBF_WRITE; + list_move_tail(&bp->b_list, list); + trace_xfs_buf_delwri_split(bp, _RET_IP_); + } else + skipped++; + } + spin_unlock(dwlk); + + return skipped; + +} + +/* + * Compare function is more complex than it needs to be because + * the return value is only 32 bits and we are doing comparisons + * on 64 bit values + */ +static int +xfs_buf_cmp( + void *priv, + struct list_head *a, + struct list_head *b) +{ + struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list); + struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list); + xfs_daddr_t diff; + + diff = ap->b_bn - bp->b_bn; + if (diff < 0) + return -1; + if (diff > 0) + return 1; + return 0; +} + +STATIC int +xfsbufd( + void *data) +{ + xfs_buftarg_t *target = (xfs_buftarg_t *)data; + + current->flags |= PF_MEMALLOC; + + set_freezable(); + + do { + long age = xfs_buf_age_centisecs * msecs_to_jiffies(10); + long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10); + struct list_head tmp; + struct blk_plug plug; + + if (unlikely(freezing(current))) { + set_bit(XBT_FORCE_SLEEP, &target->bt_flags); + refrigerator(); + } else { + clear_bit(XBT_FORCE_SLEEP, &target->bt_flags); + } + + /* sleep for a long time if there is nothing to do. */ + if (list_empty(&target->bt_delwrite_queue)) + tout = MAX_SCHEDULE_TIMEOUT; + schedule_timeout_interruptible(tout); + + xfs_buf_delwri_split(target, &tmp, age); + list_sort(NULL, &tmp, xfs_buf_cmp); + + blk_start_plug(&plug); + while (!list_empty(&tmp)) { + struct xfs_buf *bp; + bp = list_first_entry(&tmp, struct xfs_buf, b_list); + list_del_init(&bp->b_list); + xfs_bdstrat_cb(bp); + } + blk_finish_plug(&plug); + } while (!kthread_should_stop()); + + return 0; +} + +/* + * Go through all incore buffers, and release buffers if they belong to + * the given device. This is used in filesystem error handling to + * preserve the consistency of its metadata. + */ +int +xfs_flush_buftarg( + xfs_buftarg_t *target, + int wait) +{ + xfs_buf_t *bp; + int pincount = 0; + LIST_HEAD(tmp_list); + LIST_HEAD(wait_list); + struct blk_plug plug; + + xfs_buf_runall_queues(xfsconvertd_workqueue); + xfs_buf_runall_queues(xfsdatad_workqueue); + xfs_buf_runall_queues(xfslogd_workqueue); + + set_bit(XBT_FORCE_FLUSH, &target->bt_flags); + pincount = xfs_buf_delwri_split(target, &tmp_list, 0); + + /* + * Dropped the delayed write list lock, now walk the temporary list. + * All I/O is issued async and then if we need to wait for completion + * we do that after issuing all the IO. + */ + list_sort(NULL, &tmp_list, xfs_buf_cmp); + + blk_start_plug(&plug); + while (!list_empty(&tmp_list)) { + bp = list_first_entry(&tmp_list, struct xfs_buf, b_list); + ASSERT(target == bp->b_target); + list_del_init(&bp->b_list); + if (wait) { + bp->b_flags &= ~XBF_ASYNC; + list_add(&bp->b_list, &wait_list); + } + xfs_bdstrat_cb(bp); + } + blk_finish_plug(&plug); + + if (wait) { + /* Wait for IO to complete. */ + while (!list_empty(&wait_list)) { + bp = list_first_entry(&wait_list, struct xfs_buf, b_list); + + list_del_init(&bp->b_list); + xfs_buf_iowait(bp); + xfs_buf_relse(bp); + } + } + + return pincount; +} + +int __init +xfs_buf_init(void) +{ + xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", + KM_ZONE_HWALIGN, NULL); + if (!xfs_buf_zone) + goto out; + + xfslogd_workqueue = alloc_workqueue("xfslogd", + WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); + if (!xfslogd_workqueue) + goto out_free_buf_zone; + + xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1); + if (!xfsdatad_workqueue) + goto out_destroy_xfslogd_workqueue; + + xfsconvertd_workqueue = alloc_workqueue("xfsconvertd", + WQ_MEM_RECLAIM, 1); + if (!xfsconvertd_workqueue) + goto out_destroy_xfsdatad_workqueue; + + return 0; + + out_destroy_xfsdatad_workqueue: + destroy_workqueue(xfsdatad_workqueue); + out_destroy_xfslogd_workqueue: + destroy_workqueue(xfslogd_workqueue); + out_free_buf_zone: + kmem_zone_destroy(xfs_buf_zone); + out: + return -ENOMEM; +} + +void +xfs_buf_terminate(void) +{ + destroy_workqueue(xfsconvertd_workqueue); + destroy_workqueue(xfsdatad_workqueue); + destroy_workqueue(xfslogd_workqueue); + kmem_zone_destroy(xfs_buf_zone); +} + +#ifdef CONFIG_KDB_MODULES +struct list_head * +xfs_get_buftarg_list(void) +{ + return &xfs_buftarg_list; +} +#endif diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h new file mode 100644 index 000000000000..620972b8094d --- /dev/null +++ b/fs/xfs/xfs_buf.h @@ -0,0 +1,326 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_BUF_H__ +#define __XFS_BUF_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Base types + */ + +#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) + +#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE) +#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) +#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT) +#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK) + +typedef enum { + XBRW_READ = 1, /* transfer into target memory */ + XBRW_WRITE = 2, /* transfer from target memory */ + XBRW_ZERO = 3, /* Zero target memory */ +} xfs_buf_rw_t; + +#define XBF_READ (1 << 0) /* buffer intended for reading from device */ +#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ +#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ +#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */ +#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ +#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ +#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ +#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ + +/* I/O hints for the BIO layer */ +#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */ +#define XBF_FUA (1 << 11)/* force cache write through mode */ +#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */ + +/* flags used only as arguments to access routines */ +#define XBF_LOCK (1 << 15)/* lock requested */ +#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ +#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */ + +/* flags used only internally */ +#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ +#define _XBF_KMEM (1 << 21)/* backed by heap memory */ +#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */ + +typedef unsigned int xfs_buf_flags_t; + +#define XFS_BUF_FLAGS \ + { XBF_READ, "READ" }, \ + { XBF_WRITE, "WRITE" }, \ + { XBF_READ_AHEAD, "READ_AHEAD" }, \ + { XBF_MAPPED, "MAPPED" }, \ + { XBF_ASYNC, "ASYNC" }, \ + { XBF_DONE, "DONE" }, \ + { XBF_DELWRI, "DELWRI" }, \ + { XBF_STALE, "STALE" }, \ + { XBF_SYNCIO, "SYNCIO" }, \ + { XBF_FUA, "FUA" }, \ + { XBF_FLUSH, "FLUSH" }, \ + { XBF_LOCK, "LOCK" }, /* should never be set */\ + { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ + { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ + { _XBF_PAGES, "PAGES" }, \ + { _XBF_KMEM, "KMEM" }, \ + { _XBF_DELWRI_Q, "DELWRI_Q" } + +typedef enum { + XBT_FORCE_SLEEP = 0, + XBT_FORCE_FLUSH = 1, +} xfs_buftarg_flags_t; + +typedef struct xfs_buftarg { + dev_t bt_dev; + struct block_device *bt_bdev; + struct backing_dev_info *bt_bdi; + struct xfs_mount *bt_mount; + unsigned int bt_bsize; + unsigned int bt_sshift; + size_t bt_smask; + + /* per device delwri queue */ + struct task_struct *bt_task; + struct list_head bt_delwrite_queue; + spinlock_t bt_delwrite_lock; + unsigned long bt_flags; + + /* LRU control structures */ + struct shrinker bt_shrinker; + struct list_head bt_lru; + spinlock_t bt_lru_lock; + unsigned int bt_lru_nr; +} xfs_buftarg_t; + +struct xfs_buf; +typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); + +#define XB_PAGES 2 + +typedef struct xfs_buf { + /* + * first cacheline holds all the fields needed for an uncontended cache + * hit to be fully processed. The semaphore straddles the cacheline + * boundary, but the counter and lock sits on the first cacheline, + * which is the only bit that is touched if we hit the semaphore + * fast-path on locking. + */ + struct rb_node b_rbnode; /* rbtree node */ + xfs_off_t b_file_offset; /* offset in file */ + size_t b_buffer_length;/* size of buffer in bytes */ + atomic_t b_hold; /* reference count */ + atomic_t b_lru_ref; /* lru reclaim ref count */ + xfs_buf_flags_t b_flags; /* status flags */ + struct semaphore b_sema; /* semaphore for lockables */ + + struct list_head b_lru; /* lru list */ + wait_queue_head_t b_waiters; /* unpin waiters */ + struct list_head b_list; + struct xfs_perag *b_pag; /* contains rbtree root */ + xfs_buftarg_t *b_target; /* buffer target (device) */ + xfs_daddr_t b_bn; /* block number for I/O */ + size_t b_count_desired;/* desired transfer size */ + void *b_addr; /* virtual address of buffer */ + struct work_struct b_iodone_work; + xfs_buf_iodone_t b_iodone; /* I/O completion function */ + struct completion b_iowait; /* queue for I/O waiters */ + void *b_fspriv; + struct xfs_trans *b_transp; + struct page **b_pages; /* array of page pointers */ + struct page *b_page_array[XB_PAGES]; /* inline pages */ + unsigned long b_queuetime; /* time buffer was queued */ + atomic_t b_pin_count; /* pin count */ + atomic_t b_io_remaining; /* #outstanding I/O requests */ + unsigned int b_page_count; /* size of page array */ + unsigned int b_offset; /* page offset in first page */ + unsigned short b_error; /* error code on I/O */ +#ifdef XFS_BUF_LOCK_TRACKING + int b_last_holder; +#endif +} xfs_buf_t; + + +/* Finding and Reading Buffers */ +extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t, + xfs_buf_flags_t, xfs_buf_t *); +#define xfs_incore(buftarg,blkno,len,lockit) \ + _xfs_buf_find(buftarg, blkno ,len, lockit, NULL) + +extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t, + xfs_buf_flags_t); +extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, + xfs_buf_flags_t); + +extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); +extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); +extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); +extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); +extern void xfs_buf_hold(xfs_buf_t *); +extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); +struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, + struct xfs_buftarg *target, + xfs_daddr_t daddr, size_t length, int flags); + +/* Releasing Buffers */ +extern void xfs_buf_free(xfs_buf_t *); +extern void xfs_buf_rele(xfs_buf_t *); + +/* Locking and Unlocking Buffers */ +extern int xfs_buf_trylock(xfs_buf_t *); +extern void xfs_buf_lock(xfs_buf_t *); +extern void xfs_buf_unlock(xfs_buf_t *); +#define xfs_buf_islocked(bp) \ + ((bp)->b_sema.count <= 0) + +/* Buffer Read and Write Routines */ +extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); +extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); + +extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); +extern int xfs_bdstrat_cb(struct xfs_buf *); + +extern void xfs_buf_ioend(xfs_buf_t *, int); +extern void xfs_buf_ioerror(xfs_buf_t *, int); +extern int xfs_buf_iorequest(xfs_buf_t *); +extern int xfs_buf_iowait(xfs_buf_t *); +extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, + xfs_buf_rw_t); +#define xfs_buf_zero(bp, off, len) \ + xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) + +static inline int xfs_buf_geterror(xfs_buf_t *bp) +{ + return bp ? bp->b_error : ENOMEM; +} + +/* Buffer Utility Routines */ +extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); + +/* Delayed Write Buffer Routines */ +extern void xfs_buf_delwri_dequeue(xfs_buf_t *); +extern void xfs_buf_delwri_promote(xfs_buf_t *); + +/* Buffer Daemon Setup Routines */ +extern int xfs_buf_init(void); +extern void xfs_buf_terminate(void); + +static inline const char * +xfs_buf_target_name(struct xfs_buftarg *target) +{ + static char __b[BDEVNAME_SIZE]; + + return bdevname(target->bt_bdev, __b); +} + + +#define XFS_BUF_ZEROFLAGS(bp) \ + ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \ + XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) + +void xfs_buf_stale(struct xfs_buf *bp); +#define XFS_BUF_STALE(bp) xfs_buf_stale(bp); +#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE) +#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE) +#define XFS_BUF_SUPER_STALE(bp) do { \ + XFS_BUF_STALE(bp); \ + xfs_buf_delwri_dequeue(bp); \ + XFS_BUF_DONE(bp); \ + } while (0) + +#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) +#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) +#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) + +#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE) +#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE) +#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE) + +#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC) +#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) +#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) + +#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) +#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) +#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ) + +#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE) +#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) +#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) + +#define XFS_BUF_ADDR(bp) ((bp)->b_bn) +#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno)) +#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset) +#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off)) +#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired) +#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt)) +#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length) +#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt)) + +static inline void +xfs_buf_set_ref( + struct xfs_buf *bp, + int lru_ref) +{ + atomic_set(&bp->b_lru_ref, lru_ref); +} +#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref) +#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0) + +static inline int xfs_buf_ispinned(struct xfs_buf *bp) +{ + return atomic_read(&bp->b_pin_count); +} + +#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); + +static inline void xfs_buf_relse(xfs_buf_t *bp) +{ + xfs_buf_unlock(bp); + xfs_buf_rele(bp); +} + +/* + * Handling of buftargs. + */ +extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, + struct block_device *, int, const char *); +extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); +extern void xfs_wait_buftarg(xfs_buftarg_t *); +extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); +extern int xfs_flush_buftarg(xfs_buftarg_t *, int); + +#ifdef CONFIG_KDB_MODULES +extern struct list_head *xfs_get_buftarg_list(void); +#endif + +#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) +#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) + +#define xfs_binval(buftarg) xfs_flush_buftarg(buftarg, 1) +#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1) + +#endif /* __XFS_BUF_H__ */ diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c new file mode 100644 index 000000000000..244e797dae32 --- /dev/null +++ b/fs/xfs/xfs_discard.c @@ -0,0 +1,222 @@ +/* + * Copyright (C) 2010 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_quota.h" +#include "xfs_trans.h" +#include "xfs_alloc_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_inode.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_discard.h" +#include "xfs_trace.h" + +STATIC int +xfs_trim_extents( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_fsblock_t start, + xfs_fsblock_t len, + xfs_fsblock_t minlen, + __uint64_t *blocks_trimmed) +{ + struct block_device *bdev = mp->m_ddev_targp->bt_bdev; + struct xfs_btree_cur *cur; + struct xfs_buf *agbp; + struct xfs_perag *pag; + int error; + int i; + + pag = xfs_perag_get(mp, agno); + + error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + if (error || !agbp) + goto out_put_perag; + + cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT); + + /* + * Force out the log. This means any transactions that might have freed + * space before we took the AGF buffer lock are now on disk, and the + * volatile disk cache is flushed. + */ + xfs_log_force(mp, XFS_LOG_SYNC); + + /* + * Look up the longest btree in the AGF and start with it. + */ + error = xfs_alloc_lookup_le(cur, 0, + XFS_BUF_TO_AGF(agbp)->agf_longest, &i); + if (error) + goto out_del_cursor; + + /* + * Loop until we are done with all extents that are large + * enough to be worth discarding. + */ + while (i) { + xfs_agblock_t fbno; + xfs_extlen_t flen; + + error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); + if (error) + goto out_del_cursor; + XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor); + ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest); + + /* + * Too small? Give up. + */ + if (flen < minlen) { + trace_xfs_discard_toosmall(mp, agno, fbno, flen); + goto out_del_cursor; + } + + /* + * If the extent is entirely outside of the range we are + * supposed to discard skip it. Do not bother to trim + * down partially overlapping ranges for now. + */ + if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start || + XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) { + trace_xfs_discard_exclude(mp, agno, fbno, flen); + goto next_extent; + } + + /* + * If any blocks in the range are still busy, skip the + * discard and try again the next time. + */ + if (xfs_alloc_busy_search(mp, agno, fbno, flen)) { + trace_xfs_discard_busy(mp, agno, fbno, flen); + goto next_extent; + } + + trace_xfs_discard_extent(mp, agno, fbno, flen); + error = -blkdev_issue_discard(bdev, + XFS_AGB_TO_DADDR(mp, agno, fbno), + XFS_FSB_TO_BB(mp, flen), + GFP_NOFS, 0); + if (error) + goto out_del_cursor; + *blocks_trimmed += flen; + +next_extent: + error = xfs_btree_decrement(cur, 0, &i); + if (error) + goto out_del_cursor; + } + +out_del_cursor: + xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + xfs_buf_relse(agbp); +out_put_perag: + xfs_perag_put(pag); + return error; +} + +int +xfs_ioc_trim( + struct xfs_mount *mp, + struct fstrim_range __user *urange) +{ + struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; + unsigned int granularity = q->limits.discard_granularity; + struct fstrim_range range; + xfs_fsblock_t start, len, minlen; + xfs_agnumber_t start_agno, end_agno, agno; + __uint64_t blocks_trimmed = 0; + int error, last_error = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (!blk_queue_discard(q)) + return -XFS_ERROR(EOPNOTSUPP); + if (copy_from_user(&range, urange, sizeof(range))) + return -XFS_ERROR(EFAULT); + + /* + * Truncating down the len isn't actually quite correct, but using + * XFS_B_TO_FSB would mean we trivially get overflows for values + * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default + * used by the fstrim application. In the end it really doesn't + * matter as trimming blocks is an advisory interface. + */ + start = XFS_B_TO_FSBT(mp, range.start); + len = XFS_B_TO_FSBT(mp, range.len); + minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen)); + + start_agno = XFS_FSB_TO_AGNO(mp, start); + if (start_agno >= mp->m_sb.sb_agcount) + return -XFS_ERROR(EINVAL); + + end_agno = XFS_FSB_TO_AGNO(mp, start + len); + if (end_agno >= mp->m_sb.sb_agcount) + end_agno = mp->m_sb.sb_agcount - 1; + + for (agno = start_agno; agno <= end_agno; agno++) { + error = -xfs_trim_extents(mp, agno, start, len, minlen, + &blocks_trimmed); + if (error) + last_error = error; + } + + if (last_error) + return last_error; + + range.len = XFS_FSB_TO_B(mp, blocks_trimmed); + if (copy_to_user(urange, &range, sizeof(range))) + return -XFS_ERROR(EFAULT); + return 0; +} + +int +xfs_discard_extents( + struct xfs_mount *mp, + struct list_head *list) +{ + struct xfs_busy_extent *busyp; + int error = 0; + + list_for_each_entry(busyp, list, list) { + trace_xfs_discard_extent(mp, busyp->agno, busyp->bno, + busyp->length); + + error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev, + XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno), + XFS_FSB_TO_BB(mp, busyp->length), + GFP_NOFS, 0); + if (error && error != EOPNOTSUPP) { + xfs_info(mp, + "discard failed for extent [0x%llu,%u], error %d", + (unsigned long long)busyp->bno, + busyp->length, + error); + return error; + } + } + + return 0; +} diff --git a/fs/xfs/xfs_discard.h b/fs/xfs/xfs_discard.h new file mode 100644 index 000000000000..344879aea646 --- /dev/null +++ b/fs/xfs/xfs_discard.h @@ -0,0 +1,10 @@ +#ifndef XFS_DISCARD_H +#define XFS_DISCARD_H 1 + +struct fstrim_range; +struct list_head; + +extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *); +extern int xfs_discard_extents(struct xfs_mount *, struct list_head *); + +#endif /* XFS_DISCARD_H */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c new file mode 100644 index 000000000000..db62959bed13 --- /dev/null +++ b/fs/xfs/xfs_dquot.c @@ -0,0 +1,1454 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_space.h" +#include "xfs_trans_priv.h" +#include "xfs_qm.h" +#include "xfs_trace.h" + + +/* + LOCK ORDER + + inode lock (ilock) + dquot hash-chain lock (hashlock) + xqm dquot freelist lock (freelistlock + mount's dquot list lock (mplistlock) + user dquot lock - lock ordering among dquots is based on the uid or gid + group dquot lock - similar to udquots. Between the two dquots, the udquot + has to be locked first. + pin lock - the dquot lock must be held to take this lock. + flush lock - ditto. +*/ + +#ifdef DEBUG +xfs_buftarg_t *xfs_dqerror_target; +int xfs_do_dqerror; +int xfs_dqreq_num; +int xfs_dqerror_mod = 33; +#endif + +static struct lock_class_key xfs_dquot_other_class; + +/* + * Allocate and initialize a dquot. We don't always allocate fresh memory; + * we try to reclaim a free dquot if the number of incore dquots are above + * a threshold. + * The only field inside the core that gets initialized at this point + * is the d_id field. The idea is to fill in the entire q_core + * when we read in the on disk dquot. + */ +STATIC xfs_dquot_t * +xfs_qm_dqinit( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type) +{ + xfs_dquot_t *dqp; + boolean_t brandnewdquot; + + brandnewdquot = xfs_qm_dqalloc_incore(&dqp); + dqp->dq_flags = type; + dqp->q_core.d_id = cpu_to_be32(id); + dqp->q_mount = mp; + + /* + * No need to re-initialize these if this is a reclaimed dquot. + */ + if (brandnewdquot) { + INIT_LIST_HEAD(&dqp->q_freelist); + mutex_init(&dqp->q_qlock); + init_waitqueue_head(&dqp->q_pinwait); + + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&dqp->q_flush); + complete(&dqp->q_flush); + + trace_xfs_dqinit(dqp); + } else { + /* + * Only the q_core portion was zeroed in dqreclaim_one(). + * So, we need to reset others. + */ + dqp->q_nrefs = 0; + dqp->q_blkno = 0; + INIT_LIST_HEAD(&dqp->q_mplist); + INIT_LIST_HEAD(&dqp->q_hashlist); + dqp->q_bufoffset = 0; + dqp->q_fileoffset = 0; + dqp->q_transp = NULL; + dqp->q_gdquot = NULL; + dqp->q_res_bcount = 0; + dqp->q_res_icount = 0; + dqp->q_res_rtbcount = 0; + atomic_set(&dqp->q_pincount, 0); + dqp->q_hash = NULL; + ASSERT(list_empty(&dqp->q_freelist)); + + trace_xfs_dqreuse(dqp); + } + + /* + * In either case we need to make sure group quotas have a different + * lock class than user quotas, to make sure lockdep knows we can + * locks of one of each at the same time. + */ + if (!(type & XFS_DQ_USER)) + lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); + + /* + * log item gets initialized later + */ + return (dqp); +} + +/* + * This is called to free all the memory associated with a dquot + */ +void +xfs_qm_dqdestroy( + xfs_dquot_t *dqp) +{ + ASSERT(list_empty(&dqp->q_freelist)); + + mutex_destroy(&dqp->q_qlock); + kmem_zone_free(xfs_Gqm->qm_dqzone, dqp); + + atomic_dec(&xfs_Gqm->qm_totaldquots); +} + +/* + * This is what a 'fresh' dquot inside a dquot chunk looks like on disk. + */ +STATIC void +xfs_qm_dqinit_core( + xfs_dqid_t id, + uint type, + xfs_dqblk_t *d) +{ + /* + * Caller has zero'd the entire dquot 'chunk' already. + */ + d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); + d->dd_diskdq.d_version = XFS_DQUOT_VERSION; + d->dd_diskdq.d_id = cpu_to_be32(id); + d->dd_diskdq.d_flags = type; +} + +/* + * If default limits are in force, push them into the dquot now. + * We overwrite the dquot limits only if they are zero and this + * is not the root dquot. + */ +void +xfs_qm_adjust_dqlimits( + xfs_mount_t *mp, + xfs_disk_dquot_t *d) +{ + xfs_quotainfo_t *q = mp->m_quotainfo; + + ASSERT(d->d_id); + + if (q->qi_bsoftlimit && !d->d_blk_softlimit) + d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); + if (q->qi_bhardlimit && !d->d_blk_hardlimit) + d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); + if (q->qi_isoftlimit && !d->d_ino_softlimit) + d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); + if (q->qi_ihardlimit && !d->d_ino_hardlimit) + d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); + if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) + d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); + if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) + d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); +} + +/* + * Check the limits and timers of a dquot and start or reset timers + * if necessary. + * This gets called even when quota enforcement is OFF, which makes our + * life a little less complicated. (We just don't reject any quota + * reservations in that case, when enforcement is off). + * We also return 0 as the values of the timers in Q_GETQUOTA calls, when + * enforcement's off. + * In contrast, warnings are a little different in that they don't + * 'automatically' get started when limits get exceeded. They do + * get reset to zero, however, when we find the count to be under + * the soft limit (they are only ever set non-zero via userspace). + */ +void +xfs_qm_adjust_dqtimers( + xfs_mount_t *mp, + xfs_disk_dquot_t *d) +{ + ASSERT(d->d_id); + +#ifdef DEBUG + if (d->d_blk_hardlimit) + ASSERT(be64_to_cpu(d->d_blk_softlimit) <= + be64_to_cpu(d->d_blk_hardlimit)); + if (d->d_ino_hardlimit) + ASSERT(be64_to_cpu(d->d_ino_softlimit) <= + be64_to_cpu(d->d_ino_hardlimit)); + if (d->d_rtb_hardlimit) + ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= + be64_to_cpu(d->d_rtb_hardlimit)); +#endif + + if (!d->d_btimer) { + if ((d->d_blk_softlimit && + (be64_to_cpu(d->d_bcount) >= + be64_to_cpu(d->d_blk_softlimit))) || + (d->d_blk_hardlimit && + (be64_to_cpu(d->d_bcount) >= + be64_to_cpu(d->d_blk_hardlimit)))) { + d->d_btimer = cpu_to_be32(get_seconds() + + mp->m_quotainfo->qi_btimelimit); + } else { + d->d_bwarns = 0; + } + } else { + if ((!d->d_blk_softlimit || + (be64_to_cpu(d->d_bcount) < + be64_to_cpu(d->d_blk_softlimit))) && + (!d->d_blk_hardlimit || + (be64_to_cpu(d->d_bcount) < + be64_to_cpu(d->d_blk_hardlimit)))) { + d->d_btimer = 0; + } + } + + if (!d->d_itimer) { + if ((d->d_ino_softlimit && + (be64_to_cpu(d->d_icount) >= + be64_to_cpu(d->d_ino_softlimit))) || + (d->d_ino_hardlimit && + (be64_to_cpu(d->d_icount) >= + be64_to_cpu(d->d_ino_hardlimit)))) { + d->d_itimer = cpu_to_be32(get_seconds() + + mp->m_quotainfo->qi_itimelimit); + } else { + d->d_iwarns = 0; + } + } else { + if ((!d->d_ino_softlimit || + (be64_to_cpu(d->d_icount) < + be64_to_cpu(d->d_ino_softlimit))) && + (!d->d_ino_hardlimit || + (be64_to_cpu(d->d_icount) < + be64_to_cpu(d->d_ino_hardlimit)))) { + d->d_itimer = 0; + } + } + + if (!d->d_rtbtimer) { + if ((d->d_rtb_softlimit && + (be64_to_cpu(d->d_rtbcount) >= + be64_to_cpu(d->d_rtb_softlimit))) || + (d->d_rtb_hardlimit && + (be64_to_cpu(d->d_rtbcount) >= + be64_to_cpu(d->d_rtb_hardlimit)))) { + d->d_rtbtimer = cpu_to_be32(get_seconds() + + mp->m_quotainfo->qi_rtbtimelimit); + } else { + d->d_rtbwarns = 0; + } + } else { + if ((!d->d_rtb_softlimit || + (be64_to_cpu(d->d_rtbcount) < + be64_to_cpu(d->d_rtb_softlimit))) && + (!d->d_rtb_hardlimit || + (be64_to_cpu(d->d_rtbcount) < + be64_to_cpu(d->d_rtb_hardlimit)))) { + d->d_rtbtimer = 0; + } + } +} + +/* + * initialize a buffer full of dquots and log the whole thing + */ +STATIC void +xfs_qm_init_dquot_blk( + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + xfs_buf_t *bp) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + xfs_dqblk_t *d; + int curid, i; + + ASSERT(tp); + ASSERT(xfs_buf_islocked(bp)); + + d = bp->b_addr; + + /* + * ID of the first dquot in the block - id's are zero based. + */ + curid = id - (id % q->qi_dqperchunk); + ASSERT(curid >= 0); + memset(d, 0, BBTOB(q->qi_dqchunklen)); + for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) + xfs_qm_dqinit_core(curid, type, d); + xfs_trans_dquot_buf(tp, bp, + (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF : + ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF : + XFS_BLF_GDQUOT_BUF))); + xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); +} + + + +/* + * Allocate a block and fill it with dquots. + * This is called when the bmapi finds a hole. + */ +STATIC int +xfs_qm_dqalloc( + xfs_trans_t **tpp, + xfs_mount_t *mp, + xfs_dquot_t *dqp, + xfs_inode_t *quotip, + xfs_fileoff_t offset_fsb, + xfs_buf_t **O_bpp) +{ + xfs_fsblock_t firstblock; + xfs_bmap_free_t flist; + xfs_bmbt_irec_t map; + int nmaps, error, committed; + xfs_buf_t *bp; + xfs_trans_t *tp = *tpp; + + ASSERT(tp != NULL); + + trace_xfs_dqalloc(dqp); + + /* + * Initialize the bmap freelist prior to calling bmapi code. + */ + xfs_bmap_init(&flist, &firstblock); + xfs_ilock(quotip, XFS_ILOCK_EXCL); + /* + * Return if this type of quotas is turned off while we didn't + * have an inode lock + */ + if (XFS_IS_THIS_QUOTA_OFF(dqp)) { + xfs_iunlock(quotip, XFS_ILOCK_EXCL); + return (ESRCH); + } + + xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL); + nmaps = 1; + if ((error = xfs_bmapi(tp, quotip, + offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB, + XFS_BMAPI_METADATA | XFS_BMAPI_WRITE, + &firstblock, + XFS_QM_DQALLOC_SPACE_RES(mp), + &map, &nmaps, &flist))) { + goto error0; + } + ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); + ASSERT(nmaps == 1); + ASSERT((map.br_startblock != DELAYSTARTBLOCK) && + (map.br_startblock != HOLESTARTBLOCK)); + + /* + * Keep track of the blkno to save a lookup later + */ + dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); + + /* now we can just get the buffer (there's nothing to read yet) */ + bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, + dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, + 0); + if (!bp || (error = xfs_buf_geterror(bp))) + goto error1; + /* + * Make a chunk of dquots out of this buffer and log + * the entire thing. + */ + xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), + dqp->dq_flags & XFS_DQ_ALLTYPES, bp); + + /* + * xfs_bmap_finish() may commit the current transaction and + * start a second transaction if the freelist is not empty. + * + * Since we still want to modify this buffer, we need to + * ensure that the buffer is not released on commit of + * the first transaction and ensure the buffer is added to the + * second transaction. + * + * If there is only one transaction then don't stop the buffer + * from being released when it commits later on. + */ + + xfs_trans_bhold(tp, bp); + + if ((error = xfs_bmap_finish(tpp, &flist, &committed))) { + goto error1; + } + + if (committed) { + tp = *tpp; + xfs_trans_bjoin(tp, bp); + } else { + xfs_trans_bhold_release(tp, bp); + } + + *O_bpp = bp; + return 0; + + error1: + xfs_bmap_cancel(&flist); + error0: + xfs_iunlock(quotip, XFS_ILOCK_EXCL); + + return (error); +} + +/* + * Maps a dquot to the buffer containing its on-disk version. + * This returns a ptr to the buffer containing the on-disk dquot + * in the bpp param, and a ptr to the on-disk dquot within that buffer + */ +STATIC int +xfs_qm_dqtobp( + xfs_trans_t **tpp, + xfs_dquot_t *dqp, + xfs_disk_dquot_t **O_ddpp, + xfs_buf_t **O_bpp, + uint flags) +{ + xfs_bmbt_irec_t map; + int nmaps = 1, error; + xfs_buf_t *bp; + xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); + xfs_mount_t *mp = dqp->q_mount; + xfs_disk_dquot_t *ddq; + xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); + xfs_trans_t *tp = (tpp ? *tpp : NULL); + + dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; + + xfs_ilock(quotip, XFS_ILOCK_SHARED); + if (XFS_IS_THIS_QUOTA_OFF(dqp)) { + /* + * Return if this type of quotas is turned off while we + * didn't have the quota inode lock. + */ + xfs_iunlock(quotip, XFS_ILOCK_SHARED); + return ESRCH; + } + + /* + * Find the block map; no allocations yet + */ + error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, + XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, + NULL, 0, &map, &nmaps, NULL); + + xfs_iunlock(quotip, XFS_ILOCK_SHARED); + if (error) + return error; + + ASSERT(nmaps == 1); + ASSERT(map.br_blockcount == 1); + + /* + * Offset of dquot in the (fixed sized) dquot chunk. + */ + dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * + sizeof(xfs_dqblk_t); + + ASSERT(map.br_startblock != DELAYSTARTBLOCK); + if (map.br_startblock == HOLESTARTBLOCK) { + /* + * We don't allocate unless we're asked to + */ + if (!(flags & XFS_QMOPT_DQALLOC)) + return ENOENT; + + ASSERT(tp); + error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, + dqp->q_fileoffset, &bp); + if (error) + return error; + tp = *tpp; + } else { + trace_xfs_dqtobp_read(dqp); + + /* + * store the blkno etc so that we don't have to do the + * mapping all the time + */ + dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); + + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, + 0, &bp); + if (error || !bp) + return XFS_ERROR(error); + } + + ASSERT(xfs_buf_islocked(bp)); + + /* + * calculate the location of the dquot inside the buffer. + */ + ddq = bp->b_addr + dqp->q_bufoffset; + + /* + * A simple sanity check in case we got a corrupted dquot... + */ + error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES, + flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN), + "dqtobp"); + if (error) { + if (!(flags & XFS_QMOPT_DQREPAIR)) { + xfs_trans_brelse(tp, bp); + return XFS_ERROR(EIO); + } + } + + *O_bpp = bp; + *O_ddpp = ddq; + + return (0); +} + + +/* + * Read in the ondisk dquot using dqtobp() then copy it to an incore version, + * and release the buffer immediately. + * + */ +/* ARGSUSED */ +STATIC int +xfs_qm_dqread( + xfs_trans_t **tpp, + xfs_dqid_t id, + xfs_dquot_t *dqp, /* dquot to get filled in */ + uint flags) +{ + xfs_disk_dquot_t *ddqp; + xfs_buf_t *bp; + int error; + xfs_trans_t *tp; + + ASSERT(tpp); + + trace_xfs_dqread(dqp); + + /* + * get a pointer to the on-disk dquot and the buffer containing it + * dqp already knows its own type (GROUP/USER). + */ + if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) { + return (error); + } + tp = *tpp; + + /* copy everything from disk dquot to the incore dquot */ + memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t)); + ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); + xfs_qm_dquot_logitem_init(dqp); + + /* + * Reservation counters are defined as reservation plus current usage + * to avoid having to add every time. + */ + dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); + dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); + dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); + + /* Mark the buf so that this will stay incore a little longer */ + XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF); + + /* + * We got the buffer with a xfs_trans_read_buf() (in dqtobp()) + * So we need to release with xfs_trans_brelse(). + * The strategy here is identical to that of inodes; we lock + * the dquot in xfs_qm_dqget() before making it accessible to + * others. This is because dquots, like inodes, need a good level of + * concurrency, and we don't want to take locks on the entire buffers + * for dquot accesses. + * Note also that the dquot buffer may even be dirty at this point, if + * this particular dquot was repaired. We still aren't afraid to + * brelse it because we have the changes incore. + */ + ASSERT(xfs_buf_islocked(bp)); + xfs_trans_brelse(tp, bp); + + return (error); +} + + +/* + * allocate an incore dquot from the kernel heap, + * and fill its core with quota information kept on disk. + * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk + * if it wasn't already allocated. + */ +STATIC int +xfs_qm_idtodq( + xfs_mount_t *mp, + xfs_dqid_t id, /* gid or uid, depending on type */ + uint type, /* UDQUOT or GDQUOT */ + uint flags, /* DQALLOC, DQREPAIR */ + xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */ +{ + xfs_dquot_t *dqp; + int error; + xfs_trans_t *tp; + int cancelflags=0; + + dqp = xfs_qm_dqinit(mp, id, type); + tp = NULL; + if (flags & XFS_QMOPT_DQALLOC) { + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); + error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp), + XFS_WRITE_LOG_RES(mp) + + BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 + + 128, + 0, + XFS_TRANS_PERM_LOG_RES, + XFS_WRITE_LOG_COUNT); + if (error) { + cancelflags = 0; + goto error0; + } + cancelflags = XFS_TRANS_RELEASE_LOG_RES; + } + + /* + * Read it from disk; xfs_dqread() takes care of + * all the necessary initialization of dquot's fields (locks, etc) + */ + if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) { + /* + * This can happen if quotas got turned off (ESRCH), + * or if the dquot didn't exist on disk and we ask to + * allocate (ENOENT). + */ + trace_xfs_dqread_fail(dqp); + cancelflags |= XFS_TRANS_ABORT; + goto error0; + } + if (tp) { + if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) + goto error1; + } + + *O_dqpp = dqp; + return (0); + + error0: + ASSERT(error); + if (tp) + xfs_trans_cancel(tp, cancelflags); + error1: + xfs_qm_dqdestroy(dqp); + *O_dqpp = NULL; + return (error); +} + +/* + * Lookup a dquot in the incore dquot hashtable. We keep two separate + * hashtables for user and group dquots; and, these are global tables + * inside the XQM, not per-filesystem tables. + * The hash chain must be locked by caller, and it is left locked + * on return. Returning dquot is locked. + */ +STATIC int +xfs_qm_dqlookup( + xfs_mount_t *mp, + xfs_dqid_t id, + xfs_dqhash_t *qh, + xfs_dquot_t **O_dqpp) +{ + xfs_dquot_t *dqp; + uint flist_locked; + + ASSERT(mutex_is_locked(&qh->qh_lock)); + + flist_locked = B_FALSE; + + /* + * Traverse the hashchain looking for a match + */ + list_for_each_entry(dqp, &qh->qh_list, q_hashlist) { + /* + * We already have the hashlock. We don't need the + * dqlock to look at the id field of the dquot, since the + * id can't be modified without the hashlock anyway. + */ + if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) { + trace_xfs_dqlookup_found(dqp); + + /* + * All in core dquots must be on the dqlist of mp + */ + ASSERT(!list_empty(&dqp->q_mplist)); + + xfs_dqlock(dqp); + if (dqp->q_nrefs == 0) { + ASSERT(!list_empty(&dqp->q_freelist)); + if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { + trace_xfs_dqlookup_want(dqp); + + /* + * We may have raced with dqreclaim_one() + * (and lost). So, flag that we don't + * want the dquot to be reclaimed. + */ + dqp->dq_flags |= XFS_DQ_WANT; + xfs_dqunlock(dqp); + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + xfs_dqlock(dqp); + dqp->dq_flags &= ~(XFS_DQ_WANT); + } + flist_locked = B_TRUE; + } + + /* + * id couldn't have changed; we had the hashlock all + * along + */ + ASSERT(be32_to_cpu(dqp->q_core.d_id) == id); + + if (flist_locked) { + if (dqp->q_nrefs != 0) { + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + flist_locked = B_FALSE; + } else { + /* take it off the freelist */ + trace_xfs_dqlookup_freelist(dqp); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + } + } + + XFS_DQHOLD(dqp); + + if (flist_locked) + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + /* + * move the dquot to the front of the hashchain + */ + ASSERT(mutex_is_locked(&qh->qh_lock)); + list_move(&dqp->q_hashlist, &qh->qh_list); + trace_xfs_dqlookup_done(dqp); + *O_dqpp = dqp; + return 0; + } + } + + *O_dqpp = NULL; + ASSERT(mutex_is_locked(&qh->qh_lock)); + return (1); +} + +/* + * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a + * a locked dquot, doing an allocation (if requested) as needed. + * When both an inode and an id are given, the inode's id takes precedence. + * That is, if the id changes while we don't hold the ilock inside this + * function, the new dquot is returned, not necessarily the one requested + * in the id argument. + */ +int +xfs_qm_dqget( + xfs_mount_t *mp, + xfs_inode_t *ip, /* locked inode (optional) */ + xfs_dqid_t id, /* uid/projid/gid depending on type */ + uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */ + uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */ + xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */ +{ + xfs_dquot_t *dqp; + xfs_dqhash_t *h; + uint version; + int error; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) || + (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) || + (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) { + return (ESRCH); + } + h = XFS_DQ_HASH(mp, id, type); + +#ifdef DEBUG + if (xfs_do_dqerror) { + if ((xfs_dqerror_target == mp->m_ddev_targp) && + (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) { + xfs_debug(mp, "Returning error in dqget"); + return (EIO); + } + } +#endif + + again: + +#ifdef DEBUG + ASSERT(type == XFS_DQ_USER || + type == XFS_DQ_PROJ || + type == XFS_DQ_GROUP); + if (ip) { + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (type == XFS_DQ_USER) + ASSERT(ip->i_udquot == NULL); + else + ASSERT(ip->i_gdquot == NULL); + } +#endif + mutex_lock(&h->qh_lock); + + /* + * Look in the cache (hashtable). + * The chain is kept locked during lookup. + */ + if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) { + XQM_STATS_INC(xqmstats.xs_qm_dqcachehits); + /* + * The dquot was found, moved to the front of the chain, + * taken off the freelist if it was on it, and locked + * at this point. Just unlock the hashchain and return. + */ + ASSERT(*O_dqpp); + ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp)); + mutex_unlock(&h->qh_lock); + trace_xfs_dqget_hit(*O_dqpp); + return (0); /* success */ + } + XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses); + + /* + * Dquot cache miss. We don't want to keep the inode lock across + * a (potential) disk read. Also we don't want to deal with the lock + * ordering between quotainode and this inode. OTOH, dropping the inode + * lock here means dealing with a chown that can happen before + * we re-acquire the lock. + */ + if (ip) + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * Save the hashchain version stamp, and unlock the chain, so that + * we don't keep the lock across a disk read + */ + version = h->qh_version; + mutex_unlock(&h->qh_lock); + + /* + * Allocate the dquot on the kernel heap, and read the ondisk + * portion off the disk. Also, do all the necessary initialization + * This can return ENOENT if dquot didn't exist on disk and we didn't + * ask it to allocate; ESRCH if quotas got turned off suddenly. + */ + if ((error = xfs_qm_idtodq(mp, id, type, + flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR| + XFS_QMOPT_DOWARN), + &dqp))) { + if (ip) + xfs_ilock(ip, XFS_ILOCK_EXCL); + return (error); + } + + /* + * See if this is mount code calling to look at the overall quota limits + * which are stored in the id == 0 user or group's dquot. + * Since we may not have done a quotacheck by this point, just return + * the dquot without attaching it to any hashtables, lists, etc, or even + * taking a reference. + * The caller must dqdestroy this once done. + */ + if (flags & XFS_QMOPT_DQSUSER) { + ASSERT(id == 0); + ASSERT(! ip); + goto dqret; + } + + /* + * Dquot lock comes after hashlock in the lock ordering + */ + if (ip) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * A dquot could be attached to this inode by now, since + * we had dropped the ilock. + */ + if (type == XFS_DQ_USER) { + if (!XFS_IS_UQUOTA_ON(mp)) { + /* inode stays locked on return */ + xfs_qm_dqdestroy(dqp); + return XFS_ERROR(ESRCH); + } + if (ip->i_udquot) { + xfs_qm_dqdestroy(dqp); + dqp = ip->i_udquot; + xfs_dqlock(dqp); + goto dqret; + } + } else { + if (!XFS_IS_OQUOTA_ON(mp)) { + /* inode stays locked on return */ + xfs_qm_dqdestroy(dqp); + return XFS_ERROR(ESRCH); + } + if (ip->i_gdquot) { + xfs_qm_dqdestroy(dqp); + dqp = ip->i_gdquot; + xfs_dqlock(dqp); + goto dqret; + } + } + } + + /* + * Hashlock comes after ilock in lock order + */ + mutex_lock(&h->qh_lock); + if (version != h->qh_version) { + xfs_dquot_t *tmpdqp; + /* + * Now, see if somebody else put the dquot in the + * hashtable before us. This can happen because we didn't + * keep the hashchain lock. We don't have to worry about + * lock order between the two dquots here since dqp isn't + * on any findable lists yet. + */ + if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) { + /* + * Duplicate found. Just throw away the new dquot + * and start over. + */ + xfs_qm_dqput(tmpdqp); + mutex_unlock(&h->qh_lock); + xfs_qm_dqdestroy(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dquot_dups); + goto again; + } + } + + /* + * Put the dquot at the beginning of the hash-chain and mp's list + * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock .. + */ + ASSERT(mutex_is_locked(&h->qh_lock)); + dqp->q_hash = h; + list_add(&dqp->q_hashlist, &h->qh_list); + h->qh_version++; + + /* + * Attach this dquot to this filesystem's list of all dquots, + * kept inside the mount structure in m_quotainfo field + */ + mutex_lock(&mp->m_quotainfo->qi_dqlist_lock); + + /* + * We return a locked dquot to the caller, with a reference taken + */ + xfs_dqlock(dqp); + dqp->q_nrefs = 1; + + list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist); + mp->m_quotainfo->qi_dquots++; + mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); + mutex_unlock(&h->qh_lock); + dqret: + ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); + trace_xfs_dqget_miss(dqp); + *O_dqpp = dqp; + return (0); +} + + +/* + * Release a reference to the dquot (decrement ref-count) + * and unlock it. If there is a group quota attached to this + * dquot, carefully release that too without tripping over + * deadlocks'n'stuff. + */ +void +xfs_qm_dqput( + xfs_dquot_t *dqp) +{ + xfs_dquot_t *gdqp; + + ASSERT(dqp->q_nrefs > 0); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + trace_xfs_dqput(dqp); + + if (dqp->q_nrefs != 1) { + dqp->q_nrefs--; + xfs_dqunlock(dqp); + return; + } + + /* + * drop the dqlock and acquire the freelist and dqlock + * in the right order; but try to get it out-of-order first + */ + if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) { + trace_xfs_dqput_wait(dqp); + xfs_dqunlock(dqp); + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + xfs_dqlock(dqp); + } + + while (1) { + gdqp = NULL; + + /* We can't depend on nrefs being == 1 here */ + if (--dqp->q_nrefs == 0) { + trace_xfs_dqput_free(dqp); + + list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist); + xfs_Gqm->qm_dqfrlist_cnt++; + + /* + * If we just added a udquot to the freelist, then + * we want to release the gdquot reference that + * it (probably) has. Otherwise it'll keep the + * gdquot from getting reclaimed. + */ + if ((gdqp = dqp->q_gdquot)) { + /* + * Avoid a recursive dqput call + */ + xfs_dqlock(gdqp); + dqp->q_gdquot = NULL; + } + } + xfs_dqunlock(dqp); + + /* + * If we had a group quota inside the user quota as a hint, + * release it now. + */ + if (! gdqp) + break; + dqp = gdqp; + } + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); +} + +/* + * Release a dquot. Flush it if dirty, then dqput() it. + * dquot must not be locked. + */ +void +xfs_qm_dqrele( + xfs_dquot_t *dqp) +{ + if (!dqp) + return; + + trace_xfs_dqrele(dqp); + + xfs_dqlock(dqp); + /* + * We don't care to flush it if the dquot is dirty here. + * That will create stutters that we want to avoid. + * Instead we do a delayed write when we try to reclaim + * a dirty dquot. Also xfs_sync will take part of the burden... + */ + xfs_qm_dqput(dqp); +} + +/* + * This is the dquot flushing I/O completion routine. It is called + * from interrupt level when the buffer containing the dquot is + * flushed to disk. It is responsible for removing the dquot logitem + * from the AIL if it has not been re-logged, and unlocking the dquot's + * flush lock. This behavior is very similar to that of inodes.. + */ +STATIC void +xfs_qm_dqflush_done( + struct xfs_buf *bp, + struct xfs_log_item *lip) +{ + xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip; + xfs_dquot_t *dqp = qip->qli_dquot; + struct xfs_ail *ailp = lip->li_ailp; + + /* + * We only want to pull the item from the AIL if its + * location in the log has not changed since we started the flush. + * Thus, we only bother if the dquot's lsn has + * not changed. First we check the lsn outside the lock + * since it's cheaper, and then we recheck while + * holding the lock before removing the dquot from the AIL. + */ + if ((lip->li_flags & XFS_LI_IN_AIL) && + lip->li_lsn == qip->qli_flush_lsn) { + + /* xfs_trans_ail_delete() drops the AIL lock. */ + spin_lock(&ailp->xa_lock); + if (lip->li_lsn == qip->qli_flush_lsn) + xfs_trans_ail_delete(ailp, lip); + else + spin_unlock(&ailp->xa_lock); + } + + /* + * Release the dq's flush lock since we're done with it. + */ + xfs_dqfunlock(dqp); +} + +/* + * Write a modified dquot to disk. + * The dquot must be locked and the flush lock too taken by caller. + * The flush lock will not be unlocked until the dquot reaches the disk, + * but the dquot is free to be unlocked and modified by the caller + * in the interim. Dquot is still locked on return. This behavior is + * identical to that of inodes. + */ +int +xfs_qm_dqflush( + xfs_dquot_t *dqp, + uint flags) +{ + struct xfs_mount *mp = dqp->q_mount; + struct xfs_buf *bp; + struct xfs_disk_dquot *ddqp; + int error; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(!completion_done(&dqp->q_flush)); + + trace_xfs_dqflush(dqp); + + /* + * If not dirty, or it's pinned and we are not supposed to block, nada. + */ + if (!XFS_DQ_IS_DIRTY(dqp) || + (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { + xfs_dqfunlock(dqp); + return 0; + } + xfs_qm_dqunpin_wait(dqp); + + /* + * This may have been unpinned because the filesystem is shutting + * down forcibly. If that's the case we must not write this dquot + * to disk, because the log record didn't make it to disk! + */ + if (XFS_FORCED_SHUTDOWN(mp)) { + dqp->dq_flags &= ~XFS_DQ_DIRTY; + xfs_dqfunlock(dqp); + return XFS_ERROR(EIO); + } + + /* + * Get the buffer containing the on-disk dquot + */ + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, 0, &bp); + if (error) { + ASSERT(error != ENOENT); + xfs_dqfunlock(dqp); + return error; + } + + /* + * Calculate the location of the dquot inside the buffer. + */ + ddqp = bp->b_addr + dqp->q_bufoffset; + + /* + * A simple sanity check in case we got a corrupted dquot.. + */ + error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, + XFS_QMOPT_DOWARN, "dqflush (incore copy)"); + if (error) { + xfs_buf_relse(bp); + xfs_dqfunlock(dqp); + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return XFS_ERROR(EIO); + } + + /* This is the only portion of data that needs to persist */ + memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); + + /* + * Clear the dirty field and remember the flush lsn for later use. + */ + dqp->dq_flags &= ~XFS_DQ_DIRTY; + + xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, + &dqp->q_logitem.qli_item.li_lsn); + + /* + * Attach an iodone routine so that we can remove this dquot from the + * AIL and release the flush lock once the dquot is synced to disk. + */ + xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, + &dqp->q_logitem.qli_item); + + /* + * If the buffer is pinned then push on the log so we won't + * get stuck waiting in the write for too long. + */ + if (xfs_buf_ispinned(bp)) { + trace_xfs_dqflush_force(dqp); + xfs_log_force(mp, 0); + } + + if (flags & SYNC_WAIT) + error = xfs_bwrite(mp, bp); + else + xfs_bdwrite(mp, bp); + + trace_xfs_dqflush_done(dqp); + + /* + * dqp is still locked, but caller is free to unlock it now. + */ + return error; + +} + +int +xfs_qm_dqlock_nowait( + xfs_dquot_t *dqp) +{ + return mutex_trylock(&dqp->q_qlock); +} + +void +xfs_dqlock( + xfs_dquot_t *dqp) +{ + mutex_lock(&dqp->q_qlock); +} + +void +xfs_dqunlock( + xfs_dquot_t *dqp) +{ + mutex_unlock(&(dqp->q_qlock)); + if (dqp->q_logitem.qli_dquot == dqp) { + /* Once was dqp->q_mount, but might just have been cleared */ + xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp, + (xfs_log_item_t*)&(dqp->q_logitem)); + } +} + + +void +xfs_dqunlock_nonotify( + xfs_dquot_t *dqp) +{ + mutex_unlock(&(dqp->q_qlock)); +} + +/* + * Lock two xfs_dquot structures. + * + * To avoid deadlocks we always lock the quota structure with + * the lowerd id first. + */ +void +xfs_dqlock2( + xfs_dquot_t *d1, + xfs_dquot_t *d2) +{ + if (d1 && d2) { + ASSERT(d1 != d2); + if (be32_to_cpu(d1->q_core.d_id) > + be32_to_cpu(d2->q_core.d_id)) { + mutex_lock(&d2->q_qlock); + mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); + } else { + mutex_lock(&d1->q_qlock); + mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); + } + } else if (d1) { + mutex_lock(&d1->q_qlock); + } else if (d2) { + mutex_lock(&d2->q_qlock); + } +} + + +/* + * Take a dquot out of the mount's dqlist as well as the hashlist. + * This is called via unmount as well as quotaoff, and the purge + * will always succeed unless there are soft (temp) references + * outstanding. + * + * This returns 0 if it was purged, 1 if it wasn't. It's not an error code + * that we're returning! XXXsup - not cool. + */ +/* ARGSUSED */ +int +xfs_qm_dqpurge( + xfs_dquot_t *dqp) +{ + xfs_dqhash_t *qh = dqp->q_hash; + xfs_mount_t *mp = dqp->q_mount; + + ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock)); + ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock)); + + xfs_dqlock(dqp); + /* + * We really can't afford to purge a dquot that is + * referenced, because these are hard refs. + * It shouldn't happen in general because we went thru _all_ inodes in + * dqrele_all_inodes before calling this and didn't let the mountlock go. + * However it is possible that we have dquots with temporary + * references that are not attached to an inode. e.g. see xfs_setattr(). + */ + if (dqp->q_nrefs != 0) { + xfs_dqunlock(dqp); + mutex_unlock(&dqp->q_hash->qh_lock); + return (1); + } + + ASSERT(!list_empty(&dqp->q_freelist)); + + /* + * If we're turning off quotas, we have to make sure that, for + * example, we don't delete quota disk blocks while dquots are + * in the process of getting written to those disk blocks. + * This dquot might well be on AIL, and we can't leave it there + * if we're turning off quotas. Basically, we need this flush + * lock, and are willing to block on it. + */ + if (!xfs_dqflock_nowait(dqp)) { + /* + * Block on the flush lock after nudging dquot buffer, + * if it is incore. + */ + xfs_qm_dqflock_pushbuf_wait(dqp); + } + + /* + * XXXIf we're turning this type of quotas off, we don't care + * about the dirty metadata sitting in this dquot. OTOH, if + * we're unmounting, we do care, so we flush it and wait. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + int error; + + /* dqflush unlocks dqflock */ + /* + * Given that dqpurge is a very rare occurrence, it is OK + * that we're holding the hashlist and mplist locks + * across the disk write. But, ... XXXsup + * + * We don't care about getting disk errors here. We need + * to purge this dquot anyway, so we go ahead regardless. + */ + error = xfs_qm_dqflush(dqp, SYNC_WAIT); + if (error) + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); + xfs_dqflock(dqp); + } + ASSERT(atomic_read(&dqp->q_pincount) == 0); + ASSERT(XFS_FORCED_SHUTDOWN(mp) || + !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL)); + + list_del_init(&dqp->q_hashlist); + qh->qh_version++; + list_del_init(&dqp->q_mplist); + mp->m_quotainfo->qi_dqreclaims++; + mp->m_quotainfo->qi_dquots--; + /* + * XXX Move this to the front of the freelist, if we can get the + * freelist lock. + */ + ASSERT(!list_empty(&dqp->q_freelist)); + + dqp->q_mount = NULL; + dqp->q_hash = NULL; + dqp->dq_flags = XFS_DQ_INACTIVE; + memset(&dqp->q_core, 0, sizeof(dqp->q_core)); + xfs_dqfunlock(dqp); + xfs_dqunlock(dqp); + mutex_unlock(&qh->qh_lock); + return (0); +} + + +/* + * Give the buffer a little push if it is incore and + * wait on the flush lock. + */ +void +xfs_qm_dqflock_pushbuf_wait( + xfs_dquot_t *dqp) +{ + xfs_mount_t *mp = dqp->q_mount; + xfs_buf_t *bp; + + /* + * Check to see if the dquot has been flushed delayed + * write. If so, grab its buffer and send it + * out immediately. We'll be able to acquire + * the flush lock when the I/O completes. + */ + bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); + if (!bp) + goto out_lock; + + if (XFS_BUF_ISDELAYWRITE(bp)) { + if (xfs_buf_ispinned(bp)) + xfs_log_force(mp, 0); + xfs_buf_delwri_promote(bp); + wake_up_process(bp->b_target->bt_task); + } + xfs_buf_relse(bp); +out_lock: + xfs_dqflock(dqp); +} diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h new file mode 100644 index 000000000000..34b7e945dbfa --- /dev/null +++ b/fs/xfs/xfs_dquot.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DQUOT_H__ +#define __XFS_DQUOT_H__ + +/* + * Dquots are structures that hold quota information about a user or a group, + * much like inodes are for files. In fact, dquots share many characteristics + * with inodes. However, dquots can also be a centralized resource, relative + * to a collection of inodes. In this respect, dquots share some characteristics + * of the superblock. + * XFS dquots exploit both those in its algorithms. They make every attempt + * to not be a bottleneck when quotas are on and have minimal impact, if any, + * when quotas are off. + */ + +/* + * The hash chain headers (hash buckets) + */ +typedef struct xfs_dqhash { + struct list_head qh_list; + struct mutex qh_lock; + uint qh_version; /* ever increasing version */ + uint qh_nelems; /* number of dquots on the list */ +} xfs_dqhash_t; + +struct xfs_mount; +struct xfs_trans; + +/* + * The incore dquot structure + */ +typedef struct xfs_dquot { + uint dq_flags; /* various flags (XFS_DQ_*) */ + struct list_head q_freelist; /* global free list of dquots */ + struct list_head q_mplist; /* mount's list of dquots */ + struct list_head q_hashlist; /* gloabl hash list of dquots */ + xfs_dqhash_t *q_hash; /* the hashchain header */ + struct xfs_mount*q_mount; /* filesystem this relates to */ + struct xfs_trans*q_transp; /* trans this belongs to currently */ + uint q_nrefs; /* # active refs from inodes */ + xfs_daddr_t q_blkno; /* blkno of dquot buffer */ + int q_bufoffset; /* off of dq in buffer (# dquots) */ + xfs_fileoff_t q_fileoffset; /* offset in quotas file */ + + struct xfs_dquot*q_gdquot; /* group dquot, hint only */ + xfs_disk_dquot_t q_core; /* actual usage & quotas */ + xfs_dq_logitem_t q_logitem; /* dquot log item */ + xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */ + xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ + xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ + struct mutex q_qlock; /* quota lock */ + struct completion q_flush; /* flush completion queue */ + atomic_t q_pincount; /* dquot pin count */ + wait_queue_head_t q_pinwait; /* dquot pinning wait queue */ +} xfs_dquot_t; + +/* + * Lock hierarchy for q_qlock: + * XFS_QLOCK_NORMAL is the implicit default, + * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2 + */ +enum { + XFS_QLOCK_NORMAL = 0, + XFS_QLOCK_NESTED, +}; + +#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++) + +/* + * Manage the q_flush completion queue embedded in the dquot. This completion + * queue synchronizes processes attempting to flush the in-core dquot back to + * disk. + */ +static inline void xfs_dqflock(xfs_dquot_t *dqp) +{ + wait_for_completion(&dqp->q_flush); +} + +static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) +{ + return try_wait_for_completion(&dqp->q_flush); +} + +static inline void xfs_dqfunlock(xfs_dquot_t *dqp) +{ + complete(&dqp->q_flush); +} + +#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock))) +#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) +#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) +#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ) +#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP) +#define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo) +#define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \ + XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \ + XFS_DQ_TO_QINF(dqp)->qi_gquotaip) + +#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \ + (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ + (XFS_IS_OQUOTA_ON((d)->q_mount)))) + +extern void xfs_qm_dqdestroy(xfs_dquot_t *); +extern int xfs_qm_dqflush(xfs_dquot_t *, uint); +extern int xfs_qm_dqpurge(xfs_dquot_t *); +extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); +extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); +extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); +extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, + xfs_disk_dquot_t *); +extern void xfs_qm_adjust_dqlimits(xfs_mount_t *, + xfs_disk_dquot_t *); +extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *, + xfs_dqid_t, uint, uint, xfs_dquot_t **); +extern void xfs_qm_dqput(xfs_dquot_t *); +extern void xfs_dqlock(xfs_dquot_t *); +extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *); +extern void xfs_dqunlock(xfs_dquot_t *); +extern void xfs_dqunlock_nonotify(xfs_dquot_t *); + +#endif /* __XFS_DQUOT_H__ */ diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c new file mode 100644 index 000000000000..9e0e2fa3f2c8 --- /dev/null +++ b/fs/xfs/xfs_dquot_item.c @@ -0,0 +1,529 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_priv.h" +#include "xfs_qm.h" + +static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_dq_logitem, qli_item); +} + +/* + * returns the number of iovecs needed to log the given dquot item. + */ +STATIC uint +xfs_qm_dquot_logitem_size( + struct xfs_log_item *lip) +{ + /* + * we need only two iovecs, one for the format, one for the real thing + */ + return 2; +} + +/* + * fills in the vector of log iovecs for the given dquot log item. + */ +STATIC void +xfs_qm_dquot_logitem_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *logvec) +{ + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); + + logvec->i_addr = &qlip->qli_format; + logvec->i_len = sizeof(xfs_dq_logformat_t); + logvec->i_type = XLOG_REG_TYPE_QFORMAT; + logvec++; + logvec->i_addr = &qlip->qli_dquot->q_core; + logvec->i_len = sizeof(xfs_disk_dquot_t); + logvec->i_type = XLOG_REG_TYPE_DQUOT; + + ASSERT(2 == lip->li_desc->lid_size); + qlip->qli_format.qlf_size = 2; + +} + +/* + * Increment the pin count of the given dquot. + */ +STATIC void +xfs_qm_dquot_logitem_pin( + struct xfs_log_item *lip) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + atomic_inc(&dqp->q_pincount); +} + +/* + * Decrement the pin count of the given dquot, and wake up + * anyone in xfs_dqwait_unpin() if the count goes to 0. The + * dquot must have been previously pinned with a call to + * xfs_qm_dquot_logitem_pin(). + */ +STATIC void +xfs_qm_dquot_logitem_unpin( + struct xfs_log_item *lip, + int remove) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + + ASSERT(atomic_read(&dqp->q_pincount) > 0); + if (atomic_dec_and_test(&dqp->q_pincount)) + wake_up(&dqp->q_pinwait); +} + +/* + * Given the logitem, this writes the corresponding dquot entry to disk + * asynchronously. This is called with the dquot entry securely locked; + * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot + * at the end. + */ +STATIC void +xfs_qm_dquot_logitem_push( + struct xfs_log_item *lip) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + int error; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(!completion_done(&dqp->q_flush)); + + /* + * Since we were able to lock the dquot's flush lock and + * we found it on the AIL, the dquot must be dirty. This + * is because the dquot is removed from the AIL while still + * holding the flush lock in xfs_dqflush_done(). Thus, if + * we found it in the AIL and were able to obtain the flush + * lock without sleeping, then there must not have been + * anyone in the process of flushing the dquot. + */ + error = xfs_qm_dqflush(dqp, 0); + if (error) + xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p", + __func__, error, dqp); + xfs_dqunlock(dqp); +} + +STATIC xfs_lsn_t +xfs_qm_dquot_logitem_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + /* + * We always re-log the entire dquot when it becomes dirty, + * so, the latest copy _is_ the only one that matters. + */ + return lsn; +} + +/* + * This is called to wait for the given dquot to be unpinned. + * Most of these pin/unpin routines are plagiarized from inode code. + */ +void +xfs_qm_dqunpin_wait( + struct xfs_dquot *dqp) +{ + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + if (atomic_read(&dqp->q_pincount) == 0) + return; + + /* + * Give the log a push so we don't wait here too long. + */ + xfs_log_force(dqp->q_mount, 0); + wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); +} + +/* + * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that + * the dquot is locked by us, but the flush lock isn't. So, here we are + * going to see if the relevant dquot buffer is incore, waiting on DELWRI. + * If so, we want to push it out to help us take this item off the AIL as soon + * as possible. + * + * We must not be holding the AIL lock at this point. Calling incore() to + * search the buffer cache can be a time consuming thing, and AIL lock is a + * spinlock. + */ +STATIC void +xfs_qm_dquot_logitem_pushbuf( + struct xfs_log_item *lip) +{ + struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip); + struct xfs_dquot *dqp = qlip->qli_dquot; + struct xfs_buf *bp; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + /* + * If flushlock isn't locked anymore, chances are that the + * inode flush completed and the inode was taken off the AIL. + * So, just get out. + */ + if (completion_done(&dqp->q_flush) || + !(lip->li_flags & XFS_LI_IN_AIL)) { + xfs_dqunlock(dqp); + return; + } + + bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno, + dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK); + xfs_dqunlock(dqp); + if (!bp) + return; + if (XFS_BUF_ISDELAYWRITE(bp)) + xfs_buf_delwri_promote(bp); + xfs_buf_relse(bp); +} + +/* + * This is called to attempt to lock the dquot associated with this + * dquot log item. Don't sleep on the dquot lock or the flush lock. + * If the flush lock is already held, indicating that the dquot has + * been or is in the process of being flushed, then see if we can + * find the dquot's buffer in the buffer cache without sleeping. If + * we can and it is marked delayed write, then we want to send it out. + * We delay doing so until the push routine, though, to avoid sleeping + * in any device strategy routines. + */ +STATIC uint +xfs_qm_dquot_logitem_trylock( + struct xfs_log_item *lip) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + + if (atomic_read(&dqp->q_pincount) > 0) + return XFS_ITEM_PINNED; + + if (!xfs_qm_dqlock_nowait(dqp)) + return XFS_ITEM_LOCKED; + + if (!xfs_dqflock_nowait(dqp)) { + /* + * dquot has already been flushed to the backing buffer, + * leave it locked, pushbuf routine will unlock it. + */ + return XFS_ITEM_PUSHBUF; + } + + ASSERT(lip->li_flags & XFS_LI_IN_AIL); + return XFS_ITEM_SUCCESS; +} + +/* + * Unlock the dquot associated with the log item. + * Clear the fields of the dquot and dquot log item that + * are specific to the current transaction. If the + * hold flags is set, do not unlock the dquot. + */ +STATIC void +xfs_qm_dquot_logitem_unlock( + struct xfs_log_item *lip) +{ + struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + /* + * Clear the transaction pointer in the dquot + */ + dqp->q_transp = NULL; + + /* + * dquots are never 'held' from getting unlocked at the end of + * a transaction. Their locking and unlocking is hidden inside the + * transaction layer, within trans_commit. Hence, no LI_HOLD flag + * for the logitem. + */ + xfs_dqunlock(dqp); +} + +/* + * this needs to stamp an lsn into the dquot, I think. + * rpc's that look at user dquot's would then have to + * push on the dependency recorded in the dquot + */ +STATIC void +xfs_qm_dquot_logitem_committing( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ +} + +/* + * This is the ops vector for dquots + */ +static struct xfs_item_ops xfs_dquot_item_ops = { + .iop_size = xfs_qm_dquot_logitem_size, + .iop_format = xfs_qm_dquot_logitem_format, + .iop_pin = xfs_qm_dquot_logitem_pin, + .iop_unpin = xfs_qm_dquot_logitem_unpin, + .iop_trylock = xfs_qm_dquot_logitem_trylock, + .iop_unlock = xfs_qm_dquot_logitem_unlock, + .iop_committed = xfs_qm_dquot_logitem_committed, + .iop_push = xfs_qm_dquot_logitem_push, + .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf, + .iop_committing = xfs_qm_dquot_logitem_committing +}; + +/* + * Initialize the dquot log item for a newly allocated dquot. + * The dquot isn't locked at this point, but it isn't on any of the lists + * either, so we don't care. + */ +void +xfs_qm_dquot_logitem_init( + struct xfs_dquot *dqp) +{ + struct xfs_dq_logitem *lp = &dqp->q_logitem; + + xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT, + &xfs_dquot_item_ops); + lp->qli_dquot = dqp; + lp->qli_format.qlf_type = XFS_LI_DQUOT; + lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); + lp->qli_format.qlf_blkno = dqp->q_blkno; + lp->qli_format.qlf_len = 1; + /* + * This is just the offset of this dquot within its buffer + * (which is currently 1 FSB and probably won't change). + * Hence 32 bits for this offset should be just fine. + * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t)) + * here, and recompute it at recovery time. + */ + lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset; +} + +/*------------------ QUOTAOFF LOG ITEMS -------------------*/ + +static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_qoff_logitem, qql_item); +} + + +/* + * This returns the number of iovecs needed to log the given quotaoff item. + * We only need 1 iovec for an quotaoff item. It just logs the + * quotaoff_log_format structure. + */ +STATIC uint +xfs_qm_qoff_logitem_size( + struct xfs_log_item *lip) +{ + return 1; +} + +/* + * This is called to fill in the vector of log iovecs for the + * given quotaoff log item. We use only 1 iovec, and we point that + * at the quotaoff_log_format structure embedded in the quotaoff item. + * It is at this point that we assert that all of the extent + * slots in the quotaoff item have been filled. + */ +STATIC void +xfs_qm_qoff_logitem_format( + struct xfs_log_item *lip, + struct xfs_log_iovec *log_vector) +{ + struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip); + + ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); + + log_vector->i_addr = &qflip->qql_format; + log_vector->i_len = sizeof(xfs_qoff_logitem_t); + log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; + qflip->qql_format.qf_size = 1; +} + +/* + * Pinning has no meaning for an quotaoff item, so just return. + */ +STATIC void +xfs_qm_qoff_logitem_pin( + struct xfs_log_item *lip) +{ +} + +/* + * Since pinning has no meaning for an quotaoff item, unpinning does + * not either. + */ +STATIC void +xfs_qm_qoff_logitem_unpin( + struct xfs_log_item *lip, + int remove) +{ +} + +/* + * Quotaoff items have no locking, so just return success. + */ +STATIC uint +xfs_qm_qoff_logitem_trylock( + struct xfs_log_item *lip) +{ + return XFS_ITEM_LOCKED; +} + +/* + * Quotaoff items have no locking or pushing, so return failure + * so that the caller doesn't bother with us. + */ +STATIC void +xfs_qm_qoff_logitem_unlock( + struct xfs_log_item *lip) +{ +} + +/* + * The quotaoff-start-item is logged only once and cannot be moved in the log, + * so simply return the lsn at which it's been logged. + */ +STATIC xfs_lsn_t +xfs_qm_qoff_logitem_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + return lsn; +} + +/* + * There isn't much you can do to push on an quotaoff item. It is simply + * stuck waiting for the log to be flushed to disk. + */ +STATIC void +xfs_qm_qoff_logitem_push( + struct xfs_log_item *lip) +{ +} + + +STATIC xfs_lsn_t +xfs_qm_qoffend_logitem_committed( + struct xfs_log_item *lip, + xfs_lsn_t lsn) +{ + struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip); + struct xfs_qoff_logitem *qfs = qfe->qql_start_lip; + struct xfs_ail *ailp = qfs->qql_item.li_ailp; + + /* + * Delete the qoff-start logitem from the AIL. + * xfs_trans_ail_delete() drops the AIL lock. + */ + spin_lock(&ailp->xa_lock); + xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs); + + kmem_free(qfs); + kmem_free(qfe); + return (xfs_lsn_t)-1; +} + +/* + * XXX rcc - don't know quite what to do with this. I think we can + * just ignore it. The only time that isn't the case is if we allow + * the client to somehow see that quotas have been turned off in which + * we can't allow that to get back until the quotaoff hits the disk. + * So how would that happen? Also, do we need different routines for + * quotaoff start and quotaoff end? I suspect the answer is yes but + * to be sure, I need to look at the recovery code and see how quota off + * recovery is handled (do we roll forward or back or do something else). + * If we roll forwards or backwards, then we need two separate routines, + * one that does nothing and one that stamps in the lsn that matters + * (truly makes the quotaoff irrevocable). If we do something else, + * then maybe we don't need two. + */ +STATIC void +xfs_qm_qoff_logitem_committing( + struct xfs_log_item *lip, + xfs_lsn_t commit_lsn) +{ +} + +static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { + .iop_size = xfs_qm_qoff_logitem_size, + .iop_format = xfs_qm_qoff_logitem_format, + .iop_pin = xfs_qm_qoff_logitem_pin, + .iop_unpin = xfs_qm_qoff_logitem_unpin, + .iop_trylock = xfs_qm_qoff_logitem_trylock, + .iop_unlock = xfs_qm_qoff_logitem_unlock, + .iop_committed = xfs_qm_qoffend_logitem_committed, + .iop_push = xfs_qm_qoff_logitem_push, + .iop_committing = xfs_qm_qoff_logitem_committing +}; + +/* + * This is the ops vector shared by all quotaoff-start log items. + */ +static struct xfs_item_ops xfs_qm_qoff_logitem_ops = { + .iop_size = xfs_qm_qoff_logitem_size, + .iop_format = xfs_qm_qoff_logitem_format, + .iop_pin = xfs_qm_qoff_logitem_pin, + .iop_unpin = xfs_qm_qoff_logitem_unpin, + .iop_trylock = xfs_qm_qoff_logitem_trylock, + .iop_unlock = xfs_qm_qoff_logitem_unlock, + .iop_committed = xfs_qm_qoff_logitem_committed, + .iop_push = xfs_qm_qoff_logitem_push, + .iop_committing = xfs_qm_qoff_logitem_committing +}; + +/* + * Allocate and initialize an quotaoff item of the correct quota type(s). + */ +struct xfs_qoff_logitem * +xfs_qm_qoff_logitem_init( + struct xfs_mount *mp, + struct xfs_qoff_logitem *start, + uint flags) +{ + struct xfs_qoff_logitem *qf; + + qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP); + + xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ? + &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops); + qf->qql_item.li_mountp = mp; + qf->qql_format.qf_type = XFS_LI_QUOTAOFF; + qf->qql_format.qf_flags = flags; + qf->qql_start_lip = start; + return qf; +} diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h new file mode 100644 index 000000000000..5acae2ada70b --- /dev/null +++ b/fs/xfs/xfs_dquot_item.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_DQUOT_ITEM_H__ +#define __XFS_DQUOT_ITEM_H__ + +struct xfs_dquot; +struct xfs_trans; +struct xfs_mount; +struct xfs_qoff_logitem; + +typedef struct xfs_dq_logitem { + xfs_log_item_t qli_item; /* common portion */ + struct xfs_dquot *qli_dquot; /* dquot ptr */ + xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ + xfs_dq_logformat_t qli_format; /* logged structure */ +} xfs_dq_logitem_t; + +typedef struct xfs_qoff_logitem { + xfs_log_item_t qql_item; /* common portion */ + struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ + xfs_qoff_logformat_t qql_format; /* logged structure */ +} xfs_qoff_logitem_t; + + +extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *); +extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *, + struct xfs_qoff_logitem *, uint); +extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *, + struct xfs_qoff_logitem *, uint); +extern void xfs_trans_log_quotaoff_item(struct xfs_trans *, + struct xfs_qoff_logitem *); + +#endif /* __XFS_DQUOT_ITEM_H__ */ diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c new file mode 100644 index 000000000000..75e5d322e48f --- /dev/null +++ b/fs/xfs/xfs_export.c @@ -0,0 +1,250 @@ +/* + * Copyright (c) 2004-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_types.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_mount.h" +#include "xfs_export.h" +#include "xfs_vnodeops.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_inode_item.h" +#include "xfs_trace.h" + +/* + * Note that we only accept fileids which are long enough rather than allow + * the parent generation number to default to zero. XFS considers zero a + * valid generation number not an invalid/wildcard value. + */ +static int xfs_fileid_length(int fileid_type) +{ + switch (fileid_type) { + case FILEID_INO32_GEN: + return 2; + case FILEID_INO32_GEN_PARENT: + return 4; + case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: + return 3; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + return 6; + } + return 255; /* invalid */ +} + +STATIC int +xfs_fs_encode_fh( + struct dentry *dentry, + __u32 *fh, + int *max_len, + int connectable) +{ + struct fid *fid = (struct fid *)fh; + struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; + struct inode *inode = dentry->d_inode; + int fileid_type; + int len; + + /* Directories don't need their parent encoded, they have ".." */ + if (S_ISDIR(inode->i_mode) || !connectable) + fileid_type = FILEID_INO32_GEN; + else + fileid_type = FILEID_INO32_GEN_PARENT; + + /* + * If the the filesystem may contain 64bit inode numbers, we need + * to use larger file handles that can represent them. + * + * While we only allocate inodes that do not fit into 32 bits any + * large enough filesystem may contain them, thus the slightly + * confusing looking conditional below. + */ + if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) || + (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES)) + fileid_type |= XFS_FILEID_TYPE_64FLAG; + + /* + * Only encode if there is enough space given. In practice + * this means we can't export a filesystem with 64bit inodes + * over NFSv2 with the subtree_check export option; the other + * seven combinations work. The real answer is "don't use v2". + */ + len = xfs_fileid_length(fileid_type); + if (*max_len < len) { + *max_len = len; + return 255; + } + *max_len = len; + + switch (fileid_type) { + case FILEID_INO32_GEN_PARENT: + spin_lock(&dentry->d_lock); + fid->i32.parent_ino = dentry->d_parent->d_inode->i_ino; + fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; + spin_unlock(&dentry->d_lock); + /*FALLTHRU*/ + case FILEID_INO32_GEN: + fid->i32.ino = inode->i_ino; + fid->i32.gen = inode->i_generation; + break; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + spin_lock(&dentry->d_lock); + fid64->parent_ino = dentry->d_parent->d_inode->i_ino; + fid64->parent_gen = dentry->d_parent->d_inode->i_generation; + spin_unlock(&dentry->d_lock); + /*FALLTHRU*/ + case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: + fid64->ino = inode->i_ino; + fid64->gen = inode->i_generation; + break; + } + + return fileid_type; +} + +STATIC struct inode * +xfs_nfs_get_inode( + struct super_block *sb, + u64 ino, + u32 generation) + { + xfs_mount_t *mp = XFS_M(sb); + xfs_inode_t *ip; + int error; + + /* + * NFS can sometimes send requests for ino 0. Fail them gracefully. + */ + if (ino == 0) + return ERR_PTR(-ESTALE); + + /* + * The XFS_IGET_UNTRUSTED means that an invalid inode number is just + * fine and not an indication of a corrupted filesystem as clients can + * send invalid file handles and we have to handle it gracefully.. + */ + error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip); + if (error) { + /* + * EINVAL means the inode cluster doesn't exist anymore. + * This implies the filehandle is stale, so we should + * translate it here. + * We don't use ESTALE directly down the chain to not + * confuse applications using bulkstat that expect EINVAL. + */ + if (error == EINVAL || error == ENOENT) + error = ESTALE; + return ERR_PTR(-error); + } + + if (ip->i_d.di_gen != generation) { + IRELE(ip); + return ERR_PTR(-ESTALE); + } + + return VFS_I(ip); +} + +STATIC struct dentry * +xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fileid_type) +{ + struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; + struct inode *inode = NULL; + + if (fh_len < xfs_fileid_length(fileid_type)) + return NULL; + + switch (fileid_type) { + case FILEID_INO32_GEN_PARENT: + case FILEID_INO32_GEN: + inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen); + break; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: + inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen); + break; + } + + return d_obtain_alias(inode); +} + +STATIC struct dentry * +xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fileid_type) +{ + struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; + struct inode *inode = NULL; + + switch (fileid_type) { + case FILEID_INO32_GEN_PARENT: + inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino, + fid->i32.parent_gen); + break; + case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: + inode = xfs_nfs_get_inode(sb, fid64->parent_ino, + fid64->parent_gen); + break; + } + + return d_obtain_alias(inode); +} + +STATIC struct dentry * +xfs_fs_get_parent( + struct dentry *child) +{ + int error; + struct xfs_inode *cip; + + error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); + if (unlikely(error)) + return ERR_PTR(-error); + + return d_obtain_alias(VFS_I(cip)); +} + +STATIC int +xfs_fs_nfs_commit_metadata( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + int error = 0; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_ipincount(ip)) { + error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn, + XFS_LOG_SYNC, NULL); + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + return error; +} + +const struct export_operations xfs_export_operations = { + .encode_fh = xfs_fs_encode_fh, + .fh_to_dentry = xfs_fs_fh_to_dentry, + .fh_to_parent = xfs_fs_fh_to_parent, + .get_parent = xfs_fs_get_parent, + .commit_metadata = xfs_fs_nfs_commit_metadata, +}; diff --git a/fs/xfs/xfs_export.h b/fs/xfs/xfs_export.h new file mode 100644 index 000000000000..3272b6ae7a35 --- /dev/null +++ b/fs/xfs/xfs_export.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_EXPORT_H__ +#define __XFS_EXPORT_H__ + +/* + * Common defines for code related to exporting XFS filesystems over NFS. + * + * The NFS fileid goes out on the wire as an array of + * 32bit unsigned ints in host order. There are 5 possible + * formats. + * + * (1) fileid_type=0x00 + * (no fileid data; handled by the generic code) + * + * (2) fileid_type=0x01 + * inode-num + * generation + * + * (3) fileid_type=0x02 + * inode-num + * generation + * parent-inode-num + * parent-generation + * + * (4) fileid_type=0x81 + * inode-num-lo32 + * inode-num-hi32 + * generation + * + * (5) fileid_type=0x82 + * inode-num-lo32 + * inode-num-hi32 + * generation + * parent-inode-num-lo32 + * parent-inode-num-hi32 + * parent-generation + * + * Note, the NFS filehandle also includes an fsid portion which + * may have an inode number in it. That number is hardcoded to + * 32bits and there is no way for XFS to intercept it. In + * practice this means when exporting an XFS filesystem with 64bit + * inodes you should either export the mountpoint (rather than + * a subdirectory) or use the "fsid" export option. + */ + +struct xfs_fid64 { + u64 ino; + u32 gen; + u64 parent_ino; + u32 parent_gen; +} __attribute__((packed)); + +/* This flag goes on the wire. Don't play with it. */ +#define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */ + +#endif /* __XFS_EXPORT_H__ */ diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c new file mode 100644 index 000000000000..7f7b42469ea7 --- /dev/null +++ b/fs/xfs/xfs_file.c @@ -0,0 +1,1096 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_trans.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_inode_item.h" +#include "xfs_bmap.h" +#include "xfs_error.h" +#include "xfs_vnodeops.h" +#include "xfs_da_btree.h" +#include "xfs_ioctl.h" +#include "xfs_trace.h" + +#include +#include + +static const struct vm_operations_struct xfs_file_vm_ops; + +/* + * Locking primitives for read and write IO paths to ensure we consistently use + * and order the inode->i_mutex, ip->i_lock and ip->i_iolock. + */ +static inline void +xfs_rw_ilock( + struct xfs_inode *ip, + int type) +{ + if (type & XFS_IOLOCK_EXCL) + mutex_lock(&VFS_I(ip)->i_mutex); + xfs_ilock(ip, type); +} + +static inline void +xfs_rw_iunlock( + struct xfs_inode *ip, + int type) +{ + xfs_iunlock(ip, type); + if (type & XFS_IOLOCK_EXCL) + mutex_unlock(&VFS_I(ip)->i_mutex); +} + +static inline void +xfs_rw_ilock_demote( + struct xfs_inode *ip, + int type) +{ + xfs_ilock_demote(ip, type); + if (type & XFS_IOLOCK_EXCL) + mutex_unlock(&VFS_I(ip)->i_mutex); +} + +/* + * xfs_iozero + * + * xfs_iozero clears the specified range of buffer supplied, + * and marks all the affected blocks as valid and modified. If + * an affected block is not allocated, it will be allocated. If + * an affected block is not completely overwritten, and is not + * valid before the operation, it will be read from disk before + * being partially zeroed. + */ +STATIC int +xfs_iozero( + struct xfs_inode *ip, /* inode */ + loff_t pos, /* offset in file */ + size_t count) /* size of data to zero */ +{ + struct page *page; + struct address_space *mapping; + int status; + + mapping = VFS_I(ip)->i_mapping; + do { + unsigned offset, bytes; + void *fsdata; + + offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */ + bytes = PAGE_CACHE_SIZE - offset; + if (bytes > count) + bytes = count; + + status = pagecache_write_begin(NULL, mapping, pos, bytes, + AOP_FLAG_UNINTERRUPTIBLE, + &page, &fsdata); + if (status) + break; + + zero_user(page, offset, bytes); + + status = pagecache_write_end(NULL, mapping, pos, bytes, bytes, + page, fsdata); + WARN_ON(status <= 0); /* can't return less than zero! */ + pos += bytes; + count -= bytes; + status = 0; + } while (count); + + return (-status); +} + +STATIC int +xfs_file_fsync( + struct file *file, + loff_t start, + loff_t end, + int datasync) +{ + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error = 0; + int log_flushed = 0; + + trace_xfs_file_fsync(ip); + + error = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (error) + return error; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + xfs_iflags_clear(ip, XFS_ITRUNCATED); + + xfs_ilock(ip, XFS_IOLOCK_SHARED); + xfs_ioend_wait(ip); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + if (mp->m_flags & XFS_MOUNT_BARRIER) { + /* + * If we have an RT and/or log subvolume we need to make sure + * to flush the write cache the device used for file data + * first. This is to ensure newly written file data make + * it to disk before logging the new inode size in case of + * an extending write. + */ + if (XFS_IS_REALTIME_INODE(ip)) + xfs_blkdev_issue_flush(mp->m_rtdev_targp); + else if (mp->m_logdev_targp != mp->m_ddev_targp) + xfs_blkdev_issue_flush(mp->m_ddev_targp); + } + + /* + * We always need to make sure that the required inode state is safe on + * disk. The inode might be clean but we still might need to force the + * log because of committed transactions that haven't hit the disk yet. + * Likewise, there could be unflushed non-transactional changes to the + * inode core that have to go to disk and this requires us to issue + * a synchronous transaction to capture these changes correctly. + * + * This code relies on the assumption that if the i_update_core field + * of the inode is clear and the inode is unpinned then it is clean + * and no action is required. + */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + + /* + * First check if the VFS inode is marked dirty. All the dirtying + * of non-transactional updates no goes through mark_inode_dirty*, + * which allows us to distinguish beteeen pure timestamp updates + * and i_size updates which need to be caught for fdatasync. + * After that also theck for the dirty state in the XFS inode, which + * might gets cleared when the inode gets written out via the AIL + * or xfs_iflush_cluster. + */ + if (((inode->i_state & I_DIRTY_DATASYNC) || + ((inode->i_state & I_DIRTY_SYNC) && !datasync)) && + ip->i_update_core) { + /* + * Kick off a transaction to log the inode core to get the + * updates. The sync transaction will also force the log. + */ + xfs_iunlock(ip, XFS_ILOCK_SHARED); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, + XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return -error; + } + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Note - it's possible that we might have pushed ourselves out + * of the way during trans_reserve which would flush the inode. + * But there's no guarantee that the inode buffer has actually + * gone out yet (it's delwri). Plus the buffer could be pinned + * anyway if it's part of an inode in another recent + * transaction. So we play it safe and fire off the + * transaction anyway. + */ + xfs_trans_ijoin(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_trans_set_sync(tp); + error = _xfs_trans_commit(tp, 0, &log_flushed); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } else { + /* + * Timestamps/size haven't changed since last inode flush or + * inode transaction commit. That means either nothing got + * written or a transaction committed which caught the updates. + * If the latter happened and the transaction hasn't hit the + * disk yet, the inode will be still be pinned. If it is, + * force the log. + */ + if (xfs_ipincount(ip)) { + error = _xfs_log_force_lsn(mp, + ip->i_itemp->ili_last_lsn, + XFS_LOG_SYNC, &log_flushed); + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + } + + /* + * If we only have a single device, and the log force about was + * a no-op we might have to flush the data device cache here. + * This can only happen for fdatasync/O_DSYNC if we were overwriting + * an already allocated file and thus do not have any metadata to + * commit. + */ + if ((mp->m_flags & XFS_MOUNT_BARRIER) && + mp->m_logdev_targp == mp->m_ddev_targp && + !XFS_IS_REALTIME_INODE(ip) && + !log_flushed) + xfs_blkdev_issue_flush(mp->m_ddev_targp); + + return -error; +} + +STATIC ssize_t +xfs_file_aio_read( + struct kiocb *iocb, + const struct iovec *iovp, + unsigned long nr_segs, + loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + size_t size = 0; + ssize_t ret = 0; + int ioflags = 0; + xfs_fsize_t n; + unsigned long seg; + + XFS_STATS_INC(xs_read_calls); + + BUG_ON(iocb->ki_pos != pos); + + if (unlikely(file->f_flags & O_DIRECT)) + ioflags |= IO_ISDIRECT; + if (file->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + /* START copy & waste from filemap.c */ + for (seg = 0; seg < nr_segs; seg++) { + const struct iovec *iv = &iovp[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + size += iv->iov_len; + if (unlikely((ssize_t)(size|iv->iov_len) < 0)) + return XFS_ERROR(-EINVAL); + } + /* END copy & waste from filemap.c */ + + if (unlikely(ioflags & IO_ISDIRECT)) { + xfs_buftarg_t *target = + XFS_IS_REALTIME_INODE(ip) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + if ((iocb->ki_pos & target->bt_smask) || + (size & target->bt_smask)) { + if (iocb->ki_pos == ip->i_size) + return 0; + return -XFS_ERROR(EINVAL); + } + } + + n = XFS_MAXIOFFSET(mp) - iocb->ki_pos; + if (n <= 0 || size == 0) + return 0; + + if (n < size) + size = n; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + if (unlikely(ioflags & IO_ISDIRECT)) { + xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); + + if (inode->i_mapping->nrpages) { + ret = -xfs_flushinval_pages(ip, + (iocb->ki_pos & PAGE_CACHE_MASK), + -1, FI_REMAPF_LOCKED); + if (ret) { + xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL); + return ret; + } + } + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + } else + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + + trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags); + + ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos); + if (ret > 0) + XFS_STATS_ADD(xs_read_bytes, ret); + + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); + return ret; +} + +STATIC ssize_t +xfs_file_splice_read( + struct file *infilp, + loff_t *ppos, + struct pipe_inode_info *pipe, + size_t count, + unsigned int flags) +{ + struct xfs_inode *ip = XFS_I(infilp->f_mapping->host); + int ioflags = 0; + ssize_t ret; + + XFS_STATS_INC(xs_read_calls); + + if (infilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + xfs_rw_ilock(ip, XFS_IOLOCK_SHARED); + + trace_xfs_file_splice_read(ip, count, *ppos, ioflags); + + ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); + if (ret > 0) + XFS_STATS_ADD(xs_read_bytes, ret); + + xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); + return ret; +} + +STATIC void +xfs_aio_write_isize_update( + struct inode *inode, + loff_t *ppos, + ssize_t bytes_written) +{ + struct xfs_inode *ip = XFS_I(inode); + xfs_fsize_t isize = i_size_read(inode); + + if (bytes_written > 0) + XFS_STATS_ADD(xs_write_bytes, bytes_written); + + if (unlikely(bytes_written < 0 && bytes_written != -EFAULT && + *ppos > isize)) + *ppos = isize; + + if (*ppos > ip->i_size) { + xfs_rw_ilock(ip, XFS_ILOCK_EXCL); + if (*ppos > ip->i_size) + ip->i_size = *ppos; + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); + } +} + +/* + * If this was a direct or synchronous I/O that failed (such as ENOSPC) then + * part of the I/O may have been written to disk before the error occurred. In + * this case the on-disk file size may have been adjusted beyond the in-memory + * file size and now needs to be truncated back. + */ +STATIC void +xfs_aio_write_newsize_update( + struct xfs_inode *ip) +{ + if (ip->i_new_size) { + xfs_rw_ilock(ip, XFS_ILOCK_EXCL); + ip->i_new_size = 0; + if (ip->i_d.di_size > ip->i_size) + ip->i_d.di_size = ip->i_size; + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); + } +} + +/* + * xfs_file_splice_write() does not use xfs_rw_ilock() because + * generic_file_splice_write() takes the i_mutex itself. This, in theory, + * couuld cause lock inversions between the aio_write path and the splice path + * if someone is doing concurrent splice(2) based writes and write(2) based + * writes to the same inode. The only real way to fix this is to re-implement + * the generic code here with correct locking orders. + */ +STATIC ssize_t +xfs_file_splice_write( + struct pipe_inode_info *pipe, + struct file *outfilp, + loff_t *ppos, + size_t count, + unsigned int flags) +{ + struct inode *inode = outfilp->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + xfs_fsize_t new_size; + int ioflags = 0; + ssize_t ret; + + XFS_STATS_INC(xs_write_calls); + + if (outfilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + new_size = *ppos + count; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (new_size > ip->i_size) + ip->i_new_size = new_size; + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + trace_xfs_file_splice_write(ip, count, *ppos, ioflags); + + ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); + + xfs_aio_write_isize_update(inode, ppos, ret); + xfs_aio_write_newsize_update(ip); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return ret; +} + +/* + * This routine is called to handle zeroing any space in the last + * block of the file that is beyond the EOF. We do this since the + * size is being increased without writing anything to that block + * and we don't want anyone to read the garbage on the disk. + */ +STATIC int /* error (positive) */ +xfs_zero_last_block( + xfs_inode_t *ip, + xfs_fsize_t offset, + xfs_fsize_t isize) +{ + xfs_fileoff_t last_fsb; + xfs_mount_t *mp = ip->i_mount; + int nimaps; + int zero_offset; + int zero_len; + int error = 0; + xfs_bmbt_irec_t imap; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + zero_offset = XFS_B_FSB_OFFSET(mp, isize); + if (zero_offset == 0) { + /* + * There are no extra bytes in the last block on disk to + * zero, so return. + */ + return 0; + } + + last_fsb = XFS_B_TO_FSBT(mp, isize); + nimaps = 1; + error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap, + &nimaps, NULL); + if (error) { + return error; + } + ASSERT(nimaps > 0); + /* + * If the block underlying isize is just a hole, then there + * is nothing to zero. + */ + if (imap.br_startblock == HOLESTARTBLOCK) { + return 0; + } + /* + * Zero the part of the last block beyond the EOF, and write it + * out sync. We need to drop the ilock while we do this so we + * don't deadlock when the buffer cache calls back to us. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + zero_len = mp->m_sb.sb_blocksize - zero_offset; + if (isize + zero_len > offset) + zero_len = offset - isize; + error = xfs_iozero(ip, isize, zero_len); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + ASSERT(error >= 0); + return error; +} + +/* + * Zero any on disk space between the current EOF and the new, + * larger EOF. This handles the normal case of zeroing the remainder + * of the last block in the file and the unusual case of zeroing blocks + * out beyond the size of the file. This second case only happens + * with fixed size extents and when the system crashes before the inode + * size was updated but after blocks were allocated. If fill is set, + * then any holes in the range are filled and zeroed. If not, the holes + * are left alone as holes. + */ + +int /* error (positive) */ +xfs_zero_eof( + xfs_inode_t *ip, + xfs_off_t offset, /* starting I/O offset */ + xfs_fsize_t isize) /* current inode size */ +{ + xfs_mount_t *mp = ip->i_mount; + xfs_fileoff_t start_zero_fsb; + xfs_fileoff_t end_zero_fsb; + xfs_fileoff_t zero_count_fsb; + xfs_fileoff_t last_fsb; + xfs_fileoff_t zero_off; + xfs_fsize_t zero_len; + int nimaps; + int error = 0; + xfs_bmbt_irec_t imap; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); + ASSERT(offset > isize); + + /* + * First handle zeroing the block on which isize resides. + * We only zero a part of that block so it is handled specially. + */ + error = xfs_zero_last_block(ip, offset, isize); + if (error) { + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); + return error; + } + + /* + * Calculate the range between the new size and the old + * where blocks needing to be zeroed may exist. To get the + * block where the last byte in the file currently resides, + * we need to subtract one from the size and truncate back + * to a block boundary. We subtract 1 in case the size is + * exactly on a block boundary. + */ + last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1; + start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize); + end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1); + ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb); + if (last_fsb == end_zero_fsb) { + /* + * The size was only incremented on its last block. + * We took care of that above, so just return. + */ + return 0; + } + + ASSERT(start_zero_fsb <= end_zero_fsb); + while (start_zero_fsb <= end_zero_fsb) { + nimaps = 1; + zero_count_fsb = end_zero_fsb - start_zero_fsb + 1; + error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb, + 0, NULL, 0, &imap, &nimaps, NULL); + if (error) { + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); + return error; + } + ASSERT(nimaps > 0); + + if (imap.br_state == XFS_EXT_UNWRITTEN || + imap.br_startblock == HOLESTARTBLOCK) { + /* + * This loop handles initializing pages that were + * partially initialized by the code below this + * loop. It basically zeroes the part of the page + * that sits on a hole and sets the page as P_HOLE + * and calls remapf if it is a mapped file. + */ + start_zero_fsb = imap.br_startoff + imap.br_blockcount; + ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); + continue; + } + + /* + * There are blocks we need to zero. + * Drop the inode lock while we're doing the I/O. + * We'll still have the iolock to protect us. + */ + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + zero_off = XFS_FSB_TO_B(mp, start_zero_fsb); + zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount); + + if ((zero_off + zero_len) > offset) + zero_len = offset - zero_off; + + error = xfs_iozero(ip, zero_off, zero_len); + if (error) { + goto out_lock; + } + + start_zero_fsb = imap.br_startoff + imap.br_blockcount; + ASSERT(start_zero_fsb <= (end_zero_fsb + 1)); + + xfs_ilock(ip, XFS_ILOCK_EXCL); + } + + return 0; + +out_lock: + xfs_ilock(ip, XFS_ILOCK_EXCL); + ASSERT(error >= 0); + return error; +} + +/* + * Common pre-write limit and setup checks. + * + * Returns with iolock held according to @iolock. + */ +STATIC ssize_t +xfs_file_aio_write_checks( + struct file *file, + loff_t *pos, + size_t *count, + int *iolock) +{ + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + xfs_fsize_t new_size; + int error = 0; + + error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode)); + if (error) { + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock); + *iolock = 0; + return error; + } + + new_size = *pos + *count; + if (new_size > ip->i_size) + ip->i_new_size = new_size; + + if (likely(!(file->f_mode & FMODE_NOCMTIME))) + file_update_time(file); + + /* + * If the offset is beyond the size of the file, we need to zero any + * blocks that fall between the existing EOF and the start of this + * write. + */ + if (*pos > ip->i_size) + error = -xfs_zero_eof(ip, *pos, ip->i_size); + + xfs_rw_iunlock(ip, XFS_ILOCK_EXCL); + if (error) + return error; + + /* + * If we're writing the file then make sure to clear the setuid and + * setgid bits if the process is not being run by root. This keeps + * people from modifying setuid and setgid binaries. + */ + return file_remove_suid(file); + +} + +/* + * xfs_file_dio_aio_write - handle direct IO writes + * + * Lock the inode appropriately to prepare for and issue a direct IO write. + * By separating it from the buffered write path we remove all the tricky to + * follow locking changes and looping. + * + * If there are cached pages or we're extending the file, we need IOLOCK_EXCL + * until we're sure the bytes at the new EOF have been zeroed and/or the cached + * pages are flushed out. + * + * In most cases the direct IO writes will be done holding IOLOCK_SHARED + * allowing them to be done in parallel with reads and other direct IO writes. + * However, if the IO is not aligned to filesystem blocks, the direct IO layer + * needs to do sub-block zeroing and that requires serialisation against other + * direct IOs to the same block. In this case we need to serialise the + * submission of the unaligned IOs so that we don't get racing block zeroing in + * the dio layer. To avoid the problem with aio, we also need to wait for + * outstanding IOs to complete so that unwritten extent conversion is completed + * before we try to map the overlapping block. This is currently implemented by + * hitting it with a big hammer (i.e. xfs_ioend_wait()). + * + * Returns with locks held indicated by @iolock and errors indicated by + * negative return values. + */ +STATIC ssize_t +xfs_file_dio_aio_write( + struct kiocb *iocb, + const struct iovec *iovp, + unsigned long nr_segs, + loff_t pos, + size_t ocount, + int *iolock) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + ssize_t ret = 0; + size_t count = ocount; + int unaligned_io = 0; + struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + + *iolock = 0; + if ((pos & target->bt_smask) || (count & target->bt_smask)) + return -XFS_ERROR(EINVAL); + + if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) + unaligned_io = 1; + + if (unaligned_io || mapping->nrpages || pos > ip->i_size) + *iolock = XFS_IOLOCK_EXCL; + else + *iolock = XFS_IOLOCK_SHARED; + xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); + + ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); + if (ret) + return ret; + + if (mapping->nrpages) { + WARN_ON(*iolock != XFS_IOLOCK_EXCL); + ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1, + FI_REMAPF_LOCKED); + if (ret) + return ret; + } + + /* + * If we are doing unaligned IO, wait for all other IO to drain, + * otherwise demote the lock if we had to flush cached pages + */ + if (unaligned_io) + xfs_ioend_wait(ip); + else if (*iolock == XFS_IOLOCK_EXCL) { + xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL); + *iolock = XFS_IOLOCK_SHARED; + } + + trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); + ret = generic_file_direct_write(iocb, iovp, + &nr_segs, pos, &iocb->ki_pos, count, ocount); + + /* No fallback to buffered IO on errors for XFS. */ + ASSERT(ret < 0 || ret == count); + return ret; +} + +STATIC ssize_t +xfs_file_buffered_aio_write( + struct kiocb *iocb, + const struct iovec *iovp, + unsigned long nr_segs, + loff_t pos, + size_t ocount, + int *iolock) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + ssize_t ret; + int enospc = 0; + size_t count = ocount; + + *iolock = XFS_IOLOCK_EXCL; + xfs_rw_ilock(ip, XFS_ILOCK_EXCL | *iolock); + + ret = xfs_file_aio_write_checks(file, &pos, &count, iolock); + if (ret) + return ret; + + /* We can write back this queue in page reclaim */ + current->backing_dev_info = mapping->backing_dev_info; + +write_retry: + trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); + ret = generic_file_buffered_write(iocb, iovp, nr_segs, + pos, &iocb->ki_pos, count, ret); + /* + * if we just got an ENOSPC, flush the inode now we aren't holding any + * page locks and retry *once* + */ + if (ret == -ENOSPC && !enospc) { + ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE); + if (ret) + return ret; + enospc = 1; + goto write_retry; + } + current->backing_dev_info = NULL; + return ret; +} + +STATIC ssize_t +xfs_file_aio_write( + struct kiocb *iocb, + const struct iovec *iovp, + unsigned long nr_segs, + loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct xfs_inode *ip = XFS_I(inode); + ssize_t ret; + int iolock; + size_t ocount = 0; + + XFS_STATS_INC(xs_write_calls); + + BUG_ON(iocb->ki_pos != pos); + + ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); + if (ret) + return ret; + + if (ocount == 0) + return 0; + + xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE); + + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; + + if (unlikely(file->f_flags & O_DIRECT)) + ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, + ocount, &iolock); + else + ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, + ocount, &iolock); + + xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret); + + if (ret <= 0) + goto out_unlock; + + /* Handle various SYNC-type writes */ + if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { + loff_t end = pos + ret - 1; + int error; + + xfs_rw_iunlock(ip, iolock); + error = xfs_file_fsync(file, pos, end, + (file->f_flags & __O_SYNC) ? 0 : 1); + xfs_rw_ilock(ip, iolock); + if (error) + ret = error; + } + +out_unlock: + xfs_aio_write_newsize_update(ip); + xfs_rw_iunlock(ip, iolock); + return ret; +} + +STATIC long +xfs_file_fallocate( + struct file *file, + int mode, + loff_t offset, + loff_t len) +{ + struct inode *inode = file->f_path.dentry->d_inode; + long error; + loff_t new_size = 0; + xfs_flock64_t bf; + xfs_inode_t *ip = XFS_I(inode); + int cmd = XFS_IOC_RESVSP; + int attr_flags = XFS_ATTR_NOLOCK; + + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + return -EOPNOTSUPP; + + bf.l_whence = 0; + bf.l_start = offset; + bf.l_len = len; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + if (mode & FALLOC_FL_PUNCH_HOLE) + cmd = XFS_IOC_UNRESVSP; + + /* check the new inode size is valid before allocating */ + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) { + new_size = offset + len; + error = inode_newsize_ok(inode, new_size); + if (error) + goto out_unlock; + } + + if (file->f_flags & O_DSYNC) + attr_flags |= XFS_ATTR_SYNC; + + error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); + if (error) + goto out_unlock; + + /* Change file size if needed */ + if (new_size) { + struct iattr iattr; + + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = new_size; + error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); + } + +out_unlock: + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + return error; +} + + +STATIC int +xfs_file_open( + struct inode *inode, + struct file *file) +{ + if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) + return -EFBIG; + if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) + return -EIO; + return 0; +} + +STATIC int +xfs_dir_open( + struct inode *inode, + struct file *file) +{ + struct xfs_inode *ip = XFS_I(inode); + int mode; + int error; + + error = xfs_file_open(inode, file); + if (error) + return error; + + /* + * If there are any blocks, read-ahead block 0 as we're almost + * certain to have the next operation be a read there. + */ + mode = xfs_ilock_map_shared(ip); + if (ip->i_d.di_nextents > 0) + xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); + xfs_iunlock(ip, mode); + return 0; +} + +STATIC int +xfs_file_release( + struct inode *inode, + struct file *filp) +{ + return -xfs_release(XFS_I(inode)); +} + +STATIC int +xfs_file_readdir( + struct file *filp, + void *dirent, + filldir_t filldir) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + xfs_inode_t *ip = XFS_I(inode); + int error; + size_t bufsize; + + /* + * The Linux API doesn't pass down the total size of the buffer + * we read into down to the filesystem. With the filldir concept + * it's not needed for correct information, but the XFS dir2 leaf + * code wants an estimate of the buffer size to calculate it's + * readahead window and size the buffers used for mapping to + * physical blocks. + * + * Try to give it an estimate that's good enough, maybe at some + * point we can change the ->readdir prototype to include the + * buffer size. For now we use the current glibc buffer size. + */ + bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); + + error = xfs_readdir(ip, dirent, bufsize, + (xfs_off_t *)&filp->f_pos, filldir); + if (error) + return -error; + return 0; +} + +STATIC int +xfs_file_mmap( + struct file *filp, + struct vm_area_struct *vma) +{ + vma->vm_ops = &xfs_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; + + file_accessed(filp); + return 0; +} + +/* + * mmap()d file has taken write protection fault and is being made + * writable. We can set the page state up correctly for a writable + * page, which means we can do correct delalloc accounting (ENOSPC + * checking!) and unwritten extent mapping. + */ +STATIC int +xfs_vm_page_mkwrite( + struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + return block_page_mkwrite(vma, vmf, xfs_get_blocks); +} + +const struct file_operations xfs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = xfs_file_aio_read, + .aio_write = xfs_file_aio_write, + .splice_read = xfs_file_splice_read, + .splice_write = xfs_file_splice_write, + .unlocked_ioctl = xfs_file_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = xfs_file_compat_ioctl, +#endif + .mmap = xfs_file_mmap, + .open = xfs_file_open, + .release = xfs_file_release, + .fsync = xfs_file_fsync, + .fallocate = xfs_file_fallocate, +}; + +const struct file_operations xfs_dir_file_operations = { + .open = xfs_dir_open, + .read = generic_read_dir, + .readdir = xfs_file_readdir, + .llseek = generic_file_llseek, + .unlocked_ioctl = xfs_file_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = xfs_file_compat_ioctl, +#endif + .fsync = xfs_file_fsync, +}; + +static const struct vm_operations_struct xfs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = xfs_vm_page_mkwrite, +}; diff --git a/fs/xfs/xfs_fs_subr.c b/fs/xfs/xfs_fs_subr.c new file mode 100644 index 000000000000..ed88ed16811c --- /dev/null +++ b/fs/xfs/xfs_fs_subr.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_vnodeops.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_trace.h" + +/* + * note: all filemap functions return negative error codes. These + * need to be inverted before returning to the xfs core functions. + */ +void +xfs_tosspages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last, + int fiopt) +{ + /* can't toss partial tail pages, so mask them out */ + last &= ~(PAGE_SIZE - 1); + truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1); +} + +int +xfs_flushinval_pages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last, + int fiopt) +{ + struct address_space *mapping = VFS_I(ip)->i_mapping; + int ret = 0; + + trace_xfs_pagecache_inval(ip, first, last); + + xfs_iflags_clear(ip, XFS_ITRUNCATED); + ret = filemap_write_and_wait_range(mapping, first, + last == -1 ? LLONG_MAX : last); + if (!ret) + truncate_inode_pages_range(mapping, first, last); + return -ret; +} + +int +xfs_flush_pages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last, + uint64_t flags, + int fiopt) +{ + struct address_space *mapping = VFS_I(ip)->i_mapping; + int ret = 0; + int ret2; + + xfs_iflags_clear(ip, XFS_ITRUNCATED); + ret = -filemap_fdatawrite_range(mapping, first, + last == -1 ? LLONG_MAX : last); + if (flags & XBF_ASYNC) + return ret; + ret2 = xfs_wait_on_pages(ip, first, last); + if (!ret) + ret = ret2; + return ret; +} + +int +xfs_wait_on_pages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last) +{ + struct address_space *mapping = VFS_I(ip)->i_mapping; + + if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { + return -filemap_fdatawait_range(mapping, first, + last == -1 ? ip->i_size - 1 : last); + } + return 0; +} diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c new file mode 100644 index 000000000000..76e81cff70b9 --- /dev/null +++ b/fs/xfs/xfs_globals.c @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_sysctl.h" + +/* + * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, + * other XFS code uses these values. Times are measured in centisecs (i.e. + * 100ths of a second). + */ +xfs_param_t xfs_params = { + /* MIN DFLT MAX */ + .sgid_inherit = { 0, 0, 1 }, + .symlink_mode = { 0, 0, 1 }, + .panic_mask = { 0, 0, 255 }, + .error_level = { 0, 3, 11 }, + .syncd_timer = { 1*100, 30*100, 7200*100}, + .stats_clear = { 0, 0, 1 }, + .inherit_sync = { 0, 1, 1 }, + .inherit_nodump = { 0, 1, 1 }, + .inherit_noatim = { 0, 1, 1 }, + .xfs_buf_timer = { 100/2, 1*100, 30*100 }, + .xfs_buf_age = { 1*100, 15*100, 7200*100}, + .inherit_nosym = { 0, 0, 1 }, + .rotorstep = { 1, 1, 255 }, + .inherit_nodfrg = { 0, 1, 1 }, + .fstrm_timer = { 1, 30*100, 3600*100}, +}; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c new file mode 100644 index 000000000000..f7ce7debe14c --- /dev/null +++ b/fs/xfs/xfs_ioctl.c @@ -0,0 +1,1556 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_ioctl.h" +#include "xfs_rtalloc.h" +#include "xfs_itable.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_bmap.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_dfrag.h" +#include "xfs_fsops.h" +#include "xfs_vnodeops.h" +#include "xfs_discard.h" +#include "xfs_quota.h" +#include "xfs_inode_item.h" +#include "xfs_export.h" +#include "xfs_trace.h" + +#include +#include +#include +#include +#include +#include +#include + +/* + * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to + * a file or fs handle. + * + * XFS_IOC_PATH_TO_FSHANDLE + * returns fs handle for a mount point or path within that mount point + * XFS_IOC_FD_TO_HANDLE + * returns full handle for a FD opened in user space + * XFS_IOC_PATH_TO_HANDLE + * returns full handle for a path + */ +int +xfs_find_handle( + unsigned int cmd, + xfs_fsop_handlereq_t *hreq) +{ + int hsize; + xfs_handle_t handle; + struct inode *inode; + struct file *file = NULL; + struct path path; + int error; + struct xfs_inode *ip; + + if (cmd == XFS_IOC_FD_TO_HANDLE) { + file = fget(hreq->fd); + if (!file) + return -EBADF; + inode = file->f_path.dentry->d_inode; + } else { + error = user_lpath((const char __user *)hreq->path, &path); + if (error) + return error; + inode = path.dentry->d_inode; + } + ip = XFS_I(inode); + + /* + * We can only generate handles for inodes residing on a XFS filesystem, + * and only for regular files, directories or symbolic links. + */ + error = -EINVAL; + if (inode->i_sb->s_magic != XFS_SB_MAGIC) + goto out_put; + + error = -EBADF; + if (!S_ISREG(inode->i_mode) && + !S_ISDIR(inode->i_mode) && + !S_ISLNK(inode->i_mode)) + goto out_put; + + + memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t)); + + if (cmd == XFS_IOC_PATH_TO_FSHANDLE) { + /* + * This handle only contains an fsid, zero the rest. + */ + memset(&handle.ha_fid, 0, sizeof(handle.ha_fid)); + hsize = sizeof(xfs_fsid_t); + } else { + int lock_mode; + + lock_mode = xfs_ilock_map_shared(ip); + handle.ha_fid.fid_len = sizeof(xfs_fid_t) - + sizeof(handle.ha_fid.fid_len); + handle.ha_fid.fid_pad = 0; + handle.ha_fid.fid_gen = ip->i_d.di_gen; + handle.ha_fid.fid_ino = ip->i_ino; + xfs_iunlock_map_shared(ip, lock_mode); + + hsize = XFS_HSIZE(handle); + } + + error = -EFAULT; + if (copy_to_user(hreq->ohandle, &handle, hsize) || + copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) + goto out_put; + + error = 0; + + out_put: + if (cmd == XFS_IOC_FD_TO_HANDLE) + fput(file); + else + path_put(&path); + return error; +} + +/* + * No need to do permission checks on the various pathname components + * as the handle operations are privileged. + */ +STATIC int +xfs_handle_acceptable( + void *context, + struct dentry *dentry) +{ + return 1; +} + +/* + * Convert userspace handle data into a dentry. + */ +struct dentry * +xfs_handle_to_dentry( + struct file *parfilp, + void __user *uhandle, + u32 hlen) +{ + xfs_handle_t handle; + struct xfs_fid64 fid; + + /* + * Only allow handle opens under a directory. + */ + if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode)) + return ERR_PTR(-ENOTDIR); + + if (hlen != sizeof(xfs_handle_t)) + return ERR_PTR(-EINVAL); + if (copy_from_user(&handle, uhandle, hlen)) + return ERR_PTR(-EFAULT); + if (handle.ha_fid.fid_len != + sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len)) + return ERR_PTR(-EINVAL); + + memset(&fid, 0, sizeof(struct fid)); + fid.ino = handle.ha_fid.fid_ino; + fid.gen = handle.ha_fid.fid_gen; + + return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3, + FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG, + xfs_handle_acceptable, NULL); +} + +STATIC struct dentry * +xfs_handlereq_to_dentry( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen); +} + +int +xfs_open_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + const struct cred *cred = current_cred(); + int error; + int fd; + int permflag; + struct file *filp; + struct inode *inode; + struct dentry *dentry; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + + dentry = xfs_handlereq_to_dentry(parfilp, hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + inode = dentry->d_inode; + + /* Restrict xfs_open_by_handle to directories & regular files. */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) { + error = -XFS_ERROR(EPERM); + goto out_dput; + } + +#if BITS_PER_LONG != 32 + hreq->oflags |= O_LARGEFILE; +#endif + + /* Put open permission in namei format. */ + permflag = hreq->oflags; + if ((permflag+1) & O_ACCMODE) + permflag++; + if (permflag & O_TRUNC) + permflag |= 2; + + if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) && + (permflag & FMODE_WRITE) && IS_APPEND(inode)) { + error = -XFS_ERROR(EPERM); + goto out_dput; + } + + if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) { + error = -XFS_ERROR(EACCES); + goto out_dput; + } + + /* Can't write directories. */ + if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) { + error = -XFS_ERROR(EISDIR); + goto out_dput; + } + + fd = get_unused_fd(); + if (fd < 0) { + error = fd; + goto out_dput; + } + + filp = dentry_open(dentry, mntget(parfilp->f_path.mnt), + hreq->oflags, cred); + if (IS_ERR(filp)) { + put_unused_fd(fd); + return PTR_ERR(filp); + } + + if (S_ISREG(inode->i_mode)) { + filp->f_flags |= O_NOATIME; + filp->f_mode |= FMODE_NOCMTIME; + } + + fd_install(fd, filp); + return fd; + + out_dput: + dput(dentry); + return error; +} + +/* + * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's + * unused first argument. + */ +STATIC int +do_readlink( + char __user *buffer, + int buflen, + const char *link) +{ + int len; + + len = PTR_ERR(link); + if (IS_ERR(link)) + goto out; + + len = strlen(link); + if (len > (unsigned) buflen) + len = buflen; + if (copy_to_user(buffer, link, len)) + len = -EFAULT; + out: + return len; +} + + +int +xfs_readlink_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq) +{ + struct dentry *dentry; + __u32 olen; + void *link; + int error; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + + dentry = xfs_handlereq_to_dentry(parfilp, hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + /* Restrict this handle operation to symlinks only. */ + if (!S_ISLNK(dentry->d_inode->i_mode)) { + error = -XFS_ERROR(EINVAL); + goto out_dput; + } + + if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { + error = -XFS_ERROR(EFAULT); + goto out_dput; + } + + link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); + if (!link) { + error = -XFS_ERROR(ENOMEM); + goto out_dput; + } + + error = -xfs_readlink(XFS_I(dentry->d_inode), link); + if (error) + goto out_kfree; + error = do_readlink(hreq->ohandle, olen, link); + if (error) + goto out_kfree; + + out_kfree: + kfree(link); + out_dput: + dput(dentry); + return error; +} + +STATIC int +xfs_fssetdm_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + struct fsdmidata fsd; + xfs_fsop_setdm_handlereq_t dmhreq; + struct dentry *dentry; + + if (!capable(CAP_MKNOD)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t))) + return -XFS_ERROR(EFAULT); + + dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { + error = -XFS_ERROR(EPERM); + goto out; + } + + if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) { + error = -XFS_ERROR(EFAULT); + goto out; + } + + error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, + fsd.fsd_dmstate); + + out: + dput(dentry); + return error; +} + +STATIC int +xfs_attrlist_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error = -ENOMEM; + attrlist_cursor_kern_t *cursor; + xfs_fsop_attrlist_handlereq_t al_hreq; + struct dentry *dentry; + char *kbuf; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) + return -XFS_ERROR(EFAULT); + if (al_hreq.buflen > XATTR_LIST_MAX) + return -XFS_ERROR(EINVAL); + + /* + * Reject flags, only allow namespaces. + */ + if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) + return -XFS_ERROR(EINVAL); + + dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL); + if (!kbuf) + goto out_dput; + + cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; + error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, + al_hreq.flags, cursor); + if (error) + goto out_kfree; + + if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen)) + error = -EFAULT; + + out_kfree: + kfree(kbuf); + out_dput: + dput(dentry); + return error; +} + +int +xfs_attrmulti_attr_get( + struct inode *inode, + unsigned char *name, + unsigned char __user *ubuf, + __uint32_t *len, + __uint32_t flags) +{ + unsigned char *kbuf; + int error = EFAULT; + + if (*len > XATTR_SIZE_MAX) + return EINVAL; + kbuf = kmalloc(*len, GFP_KERNEL); + if (!kbuf) + return ENOMEM; + + error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags); + if (error) + goto out_kfree; + + if (copy_to_user(ubuf, kbuf, *len)) + error = EFAULT; + + out_kfree: + kfree(kbuf); + return error; +} + +int +xfs_attrmulti_attr_set( + struct inode *inode, + unsigned char *name, + const unsigned char __user *ubuf, + __uint32_t len, + __uint32_t flags) +{ + unsigned char *kbuf; + int error = EFAULT; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return EPERM; + if (len > XATTR_SIZE_MAX) + return EINVAL; + + kbuf = memdup_user(ubuf, len); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); + + return error; +} + +int +xfs_attrmulti_attr_remove( + struct inode *inode, + unsigned char *name, + __uint32_t flags) +{ + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return EPERM; + return xfs_attr_remove(XFS_I(inode), name, flags); +} + +STATIC int +xfs_attrmulti_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + xfs_attr_multiop_t *ops; + xfs_fsop_attrmulti_handlereq_t am_hreq; + struct dentry *dentry; + unsigned int i, size; + unsigned char *attr_name; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t))) + return -XFS_ERROR(EFAULT); + + /* overflow check */ + if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t)) + return -E2BIG; + + dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = E2BIG; + size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); + if (!size || size > 16 * PAGE_SIZE) + goto out_dput; + + ops = memdup_user(am_hreq.ops, size); + if (IS_ERR(ops)) { + error = PTR_ERR(ops); + goto out_dput; + } + + attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); + if (!attr_name) + goto out_kfree_ops; + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_error = strncpy_from_user((char *)attr_name, + ops[i].am_attrname, MAXNAMELEN); + if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) + error = -ERANGE; + if (ops[i].am_error < 0) + break; + + switch (ops[i].am_opcode) { + case ATTR_OP_GET: + ops[i].am_error = xfs_attrmulti_attr_get( + dentry->d_inode, attr_name, + ops[i].am_attrvalue, &ops[i].am_length, + ops[i].am_flags); + break; + case ATTR_OP_SET: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; + ops[i].am_error = xfs_attrmulti_attr_set( + dentry->d_inode, attr_name, + ops[i].am_attrvalue, ops[i].am_length, + ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); + break; + case ATTR_OP_REMOVE: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; + ops[i].am_error = xfs_attrmulti_attr_remove( + dentry->d_inode, attr_name, + ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); + break; + default: + ops[i].am_error = EINVAL; + } + } + + if (copy_to_user(am_hreq.ops, ops, size)) + error = XFS_ERROR(EFAULT); + + kfree(attr_name); + out_kfree_ops: + kfree(ops); + out_dput: + dput(dentry); + return -error; +} + +int +xfs_ioc_space( + struct xfs_inode *ip, + struct inode *inode, + struct file *filp, + int ioflags, + unsigned int cmd, + xfs_flock64_t *bf) +{ + int attr_flags = 0; + int error; + + /* + * Only allow the sys admin to reserve space unless + * unwritten extents are enabled. + */ + if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && + !capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + + if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) + return -XFS_ERROR(EPERM); + + if (!(filp->f_mode & FMODE_WRITE)) + return -XFS_ERROR(EBADF); + + if (!S_ISREG(inode->i_mode)) + return -XFS_ERROR(EINVAL); + + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + attr_flags |= XFS_ATTR_NONBLOCK; + + if (filp->f_flags & O_DSYNC) + attr_flags |= XFS_ATTR_SYNC; + + if (ioflags & IO_INVIS) + attr_flags |= XFS_ATTR_DMI; + + error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); + return -error; +} + +STATIC int +xfs_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + void __user *arg) +{ + xfs_fsop_bulkreq_t bulkreq; + int count; /* # of records returned */ + xfs_ino_t inlast; /* last inode number */ + int done; + int error; + + /* done = 1 if there are more stats to get and if bulkstat */ + /* should be called again (unused here, but used in dmapi) */ + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) + return -XFS_ERROR(EFAULT); + + if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + return -XFS_ERROR(EFAULT); + + if ((count = bulkreq.icount) <= 0) + return -XFS_ERROR(EINVAL); + + if (bulkreq.ubuffer == NULL) + return -XFS_ERROR(EINVAL); + + if (cmd == XFS_IOC_FSINUMBERS) + error = xfs_inumbers(mp, &inlast, &count, + bulkreq.ubuffer, xfs_inumbers_fmt); + else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) + error = xfs_bulkstat_single(mp, &inlast, + bulkreq.ubuffer, &done); + else /* XFS_IOC_FSBULKSTAT */ + error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, + sizeof(xfs_bstat_t), bulkreq.ubuffer, + &done); + + if (error) + return -error; + + if (bulkreq.ocount != NULL) { + if (copy_to_user(bulkreq.lastip, &inlast, + sizeof(xfs_ino_t))) + return -XFS_ERROR(EFAULT); + + if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) + return -XFS_ERROR(EFAULT); + } + + return 0; +} + +STATIC int +xfs_ioc_fsgeometry_v1( + xfs_mount_t *mp, + void __user *arg) +{ + xfs_fsop_geom_t fsgeo; + int error; + + error = xfs_fs_geometry(mp, &fsgeo, 3); + if (error) + return -error; + + /* + * Caller should have passed an argument of type + * xfs_fsop_geom_v1_t. This is a proper subset of the + * xfs_fsop_geom_t that xfs_fs_geometry() fills in. + */ + if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_ioc_fsgeometry( + xfs_mount_t *mp, + void __user *arg) +{ + xfs_fsop_geom_t fsgeo; + int error; + + error = xfs_fs_geometry(mp, &fsgeo, 4); + if (error) + return -error; + + if (copy_to_user(arg, &fsgeo, sizeof(fsgeo))) + return -XFS_ERROR(EFAULT); + return 0; +} + +/* + * Linux extended inode flags interface. + */ + +STATIC unsigned int +xfs_merge_ioc_xflags( + unsigned int flags, + unsigned int start) +{ + unsigned int xflags = start; + + if (flags & FS_IMMUTABLE_FL) + xflags |= XFS_XFLAG_IMMUTABLE; + else + xflags &= ~XFS_XFLAG_IMMUTABLE; + if (flags & FS_APPEND_FL) + xflags |= XFS_XFLAG_APPEND; + else + xflags &= ~XFS_XFLAG_APPEND; + if (flags & FS_SYNC_FL) + xflags |= XFS_XFLAG_SYNC; + else + xflags &= ~XFS_XFLAG_SYNC; + if (flags & FS_NOATIME_FL) + xflags |= XFS_XFLAG_NOATIME; + else + xflags &= ~XFS_XFLAG_NOATIME; + if (flags & FS_NODUMP_FL) + xflags |= XFS_XFLAG_NODUMP; + else + xflags &= ~XFS_XFLAG_NODUMP; + + return xflags; +} + +STATIC unsigned int +xfs_di2lxflags( + __uint16_t di_flags) +{ + unsigned int flags = 0; + + if (di_flags & XFS_DIFLAG_IMMUTABLE) + flags |= FS_IMMUTABLE_FL; + if (di_flags & XFS_DIFLAG_APPEND) + flags |= FS_APPEND_FL; + if (di_flags & XFS_DIFLAG_SYNC) + flags |= FS_SYNC_FL; + if (di_flags & XFS_DIFLAG_NOATIME) + flags |= FS_NOATIME_FL; + if (di_flags & XFS_DIFLAG_NODUMP) + flags |= FS_NODUMP_FL; + return flags; +} + +STATIC int +xfs_ioc_fsgetxattr( + xfs_inode_t *ip, + int attr, + void __user *arg) +{ + struct fsxattr fa; + + memset(&fa, 0, sizeof(struct fsxattr)); + + xfs_ilock(ip, XFS_ILOCK_SHARED); + fa.fsx_xflags = xfs_ip2xflags(ip); + fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; + fa.fsx_projid = xfs_get_projid(ip); + + if (attr) { + if (ip->i_afp) { + if (ip->i_afp->if_flags & XFS_IFEXTENTS) + fa.fsx_nextents = ip->i_afp->if_bytes / + sizeof(xfs_bmbt_rec_t); + else + fa.fsx_nextents = ip->i_d.di_anextents; + } else + fa.fsx_nextents = 0; + } else { + if (ip->i_df.if_flags & XFS_IFEXTENTS) + fa.fsx_nextents = ip->i_df.if_bytes / + sizeof(xfs_bmbt_rec_t); + else + fa.fsx_nextents = ip->i_d.di_nextents; + } + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + if (copy_to_user(arg, &fa, sizeof(fa))) + return -EFAULT; + return 0; +} + +STATIC void +xfs_set_diflags( + struct xfs_inode *ip, + unsigned int xflags) +{ + unsigned int di_flags; + + /* can't set PREALLOC this way, just preserve it */ + di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); + if (xflags & XFS_XFLAG_IMMUTABLE) + di_flags |= XFS_DIFLAG_IMMUTABLE; + if (xflags & XFS_XFLAG_APPEND) + di_flags |= XFS_DIFLAG_APPEND; + if (xflags & XFS_XFLAG_SYNC) + di_flags |= XFS_DIFLAG_SYNC; + if (xflags & XFS_XFLAG_NOATIME) + di_flags |= XFS_DIFLAG_NOATIME; + if (xflags & XFS_XFLAG_NODUMP) + di_flags |= XFS_DIFLAG_NODUMP; + if (xflags & XFS_XFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; + if (xflags & XFS_XFLAG_NODEFRAG) + di_flags |= XFS_DIFLAG_NODEFRAG; + if (xflags & XFS_XFLAG_FILESTREAM) + di_flags |= XFS_DIFLAG_FILESTREAM; + if (S_ISDIR(ip->i_d.di_mode)) { + if (xflags & XFS_XFLAG_RTINHERIT) + di_flags |= XFS_DIFLAG_RTINHERIT; + if (xflags & XFS_XFLAG_NOSYMLINKS) + di_flags |= XFS_DIFLAG_NOSYMLINKS; + if (xflags & XFS_XFLAG_EXTSZINHERIT) + di_flags |= XFS_DIFLAG_EXTSZINHERIT; + } else if (S_ISREG(ip->i_d.di_mode)) { + if (xflags & XFS_XFLAG_REALTIME) + di_flags |= XFS_DIFLAG_REALTIME; + if (xflags & XFS_XFLAG_EXTSIZE) + di_flags |= XFS_DIFLAG_EXTSIZE; + } + + ip->i_d.di_flags = di_flags; +} + +STATIC void +xfs_diflags_to_linux( + struct xfs_inode *ip) +{ + struct inode *inode = VFS_I(ip); + unsigned int xflags = xfs_ip2xflags(ip); + + if (xflags & XFS_XFLAG_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + if (xflags & XFS_XFLAG_APPEND) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + if (xflags & XFS_XFLAG_SYNC) + inode->i_flags |= S_SYNC; + else + inode->i_flags &= ~S_SYNC; + if (xflags & XFS_XFLAG_NOATIME) + inode->i_flags |= S_NOATIME; + else + inode->i_flags &= ~S_NOATIME; +} + +#define FSX_PROJID 1 +#define FSX_EXTSIZE 2 +#define FSX_XFLAGS 4 +#define FSX_NONBLOCK 8 + +STATIC int +xfs_ioctl_setattr( + xfs_inode_t *ip, + struct fsxattr *fa, + int mask) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + unsigned int lock_flags = 0; + struct xfs_dquot *udqp = NULL; + struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *olddquot = NULL; + int code; + + trace_xfs_ioctl_setattr(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + /* + * Disallow 32bit project ids when projid32bit feature is not enabled. + */ + if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && + !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) + return XFS_ERROR(EINVAL); + + /* + * If disk quotas is on, we make sure that the dquots do exist on disk, + * before we start any other transactions. Trying to do this later + * is messy. We don't care to take a readlock to look at the ids + * in inode here, because we can't hold it across the trans_reserve. + * If the IDs do change before we take the ilock, we're covered + * because the i_*dquot fields will get updated anyway. + */ + if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { + code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid, + ip->i_d.di_gid, fa->fsx_projid, + XFS_QMOPT_PQUOTA, &udqp, &gdqp); + if (code) + return code; + } + + /* + * For the other attributes, we acquire the inode lock and + * first do an error checking pass. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (code) + goto error_return; + + lock_flags = XFS_ILOCK_EXCL; + xfs_ilock(ip, lock_flags); + + /* + * CAP_FOWNER overrides the following restrictions: + * + * The user ID of the calling process must be equal + * to the file owner ID, except in cases where the + * CAP_FSETID capability is applicable. + */ + if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) { + code = XFS_ERROR(EPERM); + goto error_return; + } + + /* + * Do a quota reservation only if projid is actually going to change. + */ + if (mask & FSX_PROJID) { + if (XFS_IS_QUOTA_RUNNING(mp) && + XFS_IS_PQUOTA_ON(mp) && + xfs_get_projid(ip) != fa->fsx_projid) { + ASSERT(tp); + code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, + capable(CAP_FOWNER) ? + XFS_QMOPT_FORCE_RES : 0); + if (code) /* out of quota */ + goto error_return; + } + } + + if (mask & FSX_EXTSIZE) { + /* + * Can't change extent size if any extents are allocated. + */ + if (ip->i_d.di_nextents && + ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != + fa->fsx_extsize)) { + code = XFS_ERROR(EINVAL); /* EFBIG? */ + goto error_return; + } + + /* + * Extent size must be a multiple of the appropriate block + * size, if set at all. It must also be smaller than the + * maximum extent size supported by the filesystem. + * + * Also, for non-realtime files, limit the extent size hint to + * half the size of the AGs in the filesystem so alignment + * doesn't result in extents larger than an AG. + */ + if (fa->fsx_extsize != 0) { + xfs_extlen_t size; + xfs_fsblock_t extsize_fsb; + + extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize); + if (extsize_fsb > MAXEXTLEN) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + + if (XFS_IS_REALTIME_INODE(ip) || + ((mask & FSX_XFLAGS) && + (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { + size = mp->m_sb.sb_rextsize << + mp->m_sb.sb_blocklog; + } else { + size = mp->m_sb.sb_blocksize; + if (extsize_fsb > mp->m_sb.sb_agblocks / 2) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + } + + if (fa->fsx_extsize % size) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + } + } + + + if (mask & FSX_XFLAGS) { + /* + * Can't change realtime flag if any extents are allocated. + */ + if ((ip->i_d.di_nextents || ip->i_delayed_blks) && + (XFS_IS_REALTIME_INODE(ip)) != + (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { + code = XFS_ERROR(EINVAL); /* EFBIG? */ + goto error_return; + } + + /* + * If realtime flag is set then must have realtime data. + */ + if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { + if ((mp->m_sb.sb_rblocks == 0) || + (mp->m_sb.sb_rextsize == 0) || + (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + } + + /* + * Can't modify an immutable/append-only file unless + * we have appropriate permission. + */ + if ((ip->i_d.di_flags & + (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || + (fa->fsx_xflags & + (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && + !capable(CAP_LINUX_IMMUTABLE)) { + code = XFS_ERROR(EPERM); + goto error_return; + } + } + + xfs_trans_ijoin(tp, ip); + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & FSX_PROJID) { + /* + * CAP_FSETID overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be + * cleared upon successful return from chown() + */ + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable(CAP_FSETID)) + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); + + /* + * Change the ownerships and register quota modifications + * in the transaction. + */ + if (xfs_get_projid(ip) != fa->fsx_projid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { + olddquot = xfs_qm_vop_chown(tp, ip, + &ip->i_gdquot, gdqp); + } + xfs_set_projid(ip, fa->fsx_projid); + + /* + * We may have to rev the inode as well as + * the superblock version number since projids didn't + * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. + */ + if (ip->i_d.di_version == 1) + xfs_bump_ino_vers2(tp, ip); + } + + } + + if (mask & FSX_EXTSIZE) + ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; + if (mask & FSX_XFLAGS) { + xfs_set_diflags(ip, fa->fsx_xflags); + xfs_diflags_to_linux(ip); + } + + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_INC(xs_ig_attrchg); + + /* + * If this is a synchronous mount, make sure that the + * transaction goes to disk before returning to the user. + * This is slightly sub-optimal in that truncates require + * two sync transactions instead of one for wsync filesystems. + * One for the truncate and one for the timestamps since we + * don't want to change the timestamps unless we're sure the + * truncate worked. Truncates are less than 1% of the laddis + * mix so this probably isn't worth the trouble to optimize. + */ + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + code = xfs_trans_commit(tp, 0); + xfs_iunlock(ip, lock_flags); + + /* + * Release any dquot(s) the inode had kept before chown. + */ + xfs_qm_dqrele(olddquot); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + + return code; + + error_return: + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + xfs_trans_cancel(tp, 0); + if (lock_flags) + xfs_iunlock(ip, lock_flags); + return code; +} + +STATIC int +xfs_ioc_fssetxattr( + xfs_inode_t *ip, + struct file *filp, + void __user *arg) +{ + struct fsxattr fa; + unsigned int mask; + + if (copy_from_user(&fa, arg, sizeof(fa))) + return -EFAULT; + + mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID; + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + mask |= FSX_NONBLOCK; + + return -xfs_ioctl_setattr(ip, &fa, mask); +} + +STATIC int +xfs_ioc_getxflags( + xfs_inode_t *ip, + void __user *arg) +{ + unsigned int flags; + + flags = xfs_di2lxflags(ip->i_d.di_flags); + if (copy_to_user(arg, &flags, sizeof(flags))) + return -EFAULT; + return 0; +} + +STATIC int +xfs_ioc_setxflags( + xfs_inode_t *ip, + struct file *filp, + void __user *arg) +{ + struct fsxattr fa; + unsigned int flags; + unsigned int mask; + + if (copy_from_user(&flags, arg, sizeof(flags))) + return -EFAULT; + + if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ + FS_NOATIME_FL | FS_NODUMP_FL | \ + FS_SYNC_FL)) + return -EOPNOTSUPP; + + mask = FSX_XFLAGS; + if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) + mask |= FSX_NONBLOCK; + fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); + + return -xfs_ioctl_setattr(ip, &fa, mask); +} + +STATIC int +xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) +{ + struct getbmap __user *base = *ap; + + /* copy only getbmap portion (not getbmapx) */ + if (copy_to_user(base, bmv, sizeof(struct getbmap))) + return XFS_ERROR(EFAULT); + + *ap += sizeof(struct getbmap); + return 0; +} + +STATIC int +xfs_ioc_getbmap( + struct xfs_inode *ip, + int ioflags, + unsigned int cmd, + void __user *arg) +{ + struct getbmapx bmx; + int error; + + if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) + return -XFS_ERROR(EFAULT); + + if (bmx.bmv_count < 2) + return -XFS_ERROR(EINVAL); + + bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); + if (ioflags & IO_INVIS) + bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; + + error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, + (struct getbmap *)arg+1); + if (error) + return -error; + + /* copy back header - only size of getbmap */ + if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) +{ + struct getbmapx __user *base = *ap; + + if (copy_to_user(base, bmv, sizeof(struct getbmapx))) + return XFS_ERROR(EFAULT); + + *ap += sizeof(struct getbmapx); + return 0; +} + +STATIC int +xfs_ioc_getbmapx( + struct xfs_inode *ip, + void __user *arg) +{ + struct getbmapx bmx; + int error; + + if (copy_from_user(&bmx, arg, sizeof(bmx))) + return -XFS_ERROR(EFAULT); + + if (bmx.bmv_count < 2) + return -XFS_ERROR(EINVAL); + + if (bmx.bmv_iflags & (~BMV_IF_VALID)) + return -XFS_ERROR(EINVAL); + + error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, + (struct getbmapx *)arg+1); + if (error) + return -error; + + /* copy back header */ + if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) + return -XFS_ERROR(EFAULT); + + return 0; +} + +/* + * Note: some of the ioctl's return positive numbers as a + * byte count indicating success, such as readlink_by_handle. + * So we don't "sign flip" like most other routines. This means + * true errors need to be returned as a negative value. + */ +long +xfs_file_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long p) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; + int error; + + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + trace_xfs_file_ioctl(ip); + + switch (cmd) { + case FITRIM: + return xfs_ioc_trim(mp, arg); + case XFS_IOC_ALLOCSP: + case XFS_IOC_FREESP: + case XFS_IOC_RESVSP: + case XFS_IOC_UNRESVSP: + case XFS_IOC_ALLOCSP64: + case XFS_IOC_FREESP64: + case XFS_IOC_RESVSP64: + case XFS_IOC_UNRESVSP64: + case XFS_IOC_ZERO_RANGE: { + xfs_flock64_t bf; + + if (copy_from_user(&bf, arg, sizeof(bf))) + return -XFS_ERROR(EFAULT); + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); + } + case XFS_IOC_DIOINFO: { + struct dioattr da; + xfs_buftarg_t *target = + XFS_IS_REALTIME_INODE(ip) ? + mp->m_rtdev_targp : mp->m_ddev_targp; + + da.d_mem = da.d_miniosz = 1 << target->bt_sshift; + da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1); + + if (copy_to_user(arg, &da, sizeof(da))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_FSBULKSTAT_SINGLE: + case XFS_IOC_FSBULKSTAT: + case XFS_IOC_FSINUMBERS: + return xfs_ioc_bulkstat(mp, cmd, arg); + + case XFS_IOC_FSGEOMETRY_V1: + return xfs_ioc_fsgeometry_v1(mp, arg); + + case XFS_IOC_FSGEOMETRY: + return xfs_ioc_fsgeometry(mp, arg); + + case XFS_IOC_GETVERSION: + return put_user(inode->i_generation, (int __user *)arg); + + case XFS_IOC_FSGETXATTR: + return xfs_ioc_fsgetxattr(ip, 0, arg); + case XFS_IOC_FSGETXATTRA: + return xfs_ioc_fsgetxattr(ip, 1, arg); + case XFS_IOC_FSSETXATTR: + return xfs_ioc_fssetxattr(ip, filp, arg); + case XFS_IOC_GETXFLAGS: + return xfs_ioc_getxflags(ip, arg); + case XFS_IOC_SETXFLAGS: + return xfs_ioc_setxflags(ip, filp, arg); + + case XFS_IOC_FSSETDM: { + struct fsdmidata dmi; + + if (copy_from_user(&dmi, arg, sizeof(dmi))) + return -XFS_ERROR(EFAULT); + + error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask, + dmi.fsd_dmstate); + return -error; + } + + case XFS_IOC_GETBMAP: + case XFS_IOC_GETBMAPA: + return xfs_ioc_getbmap(ip, ioflags, cmd, arg); + + case XFS_IOC_GETBMAPX: + return xfs_ioc_getbmapx(ip, arg); + + case XFS_IOC_FD_TO_HANDLE: + case XFS_IOC_PATH_TO_HANDLE: + case XFS_IOC_PATH_TO_FSHANDLE: { + xfs_fsop_handlereq_t hreq; + + if (copy_from_user(&hreq, arg, sizeof(hreq))) + return -XFS_ERROR(EFAULT); + return xfs_find_handle(cmd, &hreq); + } + case XFS_IOC_OPEN_BY_HANDLE: { + xfs_fsop_handlereq_t hreq; + + if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + return xfs_open_by_handle(filp, &hreq); + } + case XFS_IOC_FSSETDM_BY_HANDLE: + return xfs_fssetdm_by_handle(filp, arg); + + case XFS_IOC_READLINK_BY_HANDLE: { + xfs_fsop_handlereq_t hreq; + + if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + return xfs_readlink_by_handle(filp, &hreq); + } + case XFS_IOC_ATTRLIST_BY_HANDLE: + return xfs_attrlist_by_handle(filp, arg); + + case XFS_IOC_ATTRMULTI_BY_HANDLE: + return xfs_attrmulti_by_handle(filp, arg); + + case XFS_IOC_SWAPEXT: { + struct xfs_swapext sxp; + + if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); + return -error; + } + + case XFS_IOC_FSCOUNTS: { + xfs_fsop_counts_t out; + + error = xfs_fs_counts(mp, &out); + if (error) + return -error; + + if (copy_to_user(arg, &out, sizeof(out))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_SET_RESBLKS: { + xfs_fsop_resblks_t inout; + __uint64_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return -XFS_ERROR(EROFS); + + if (copy_from_user(&inout, arg, sizeof(inout))) + return -XFS_ERROR(EFAULT); + + /* input parameter is passed in resblks field of structure */ + in = inout.resblks; + error = xfs_reserve_blocks(mp, &in, &inout); + if (error) + return -error; + + if (copy_to_user(arg, &inout, sizeof(inout))) + return -XFS_ERROR(EFAULT); + return 0; + } + + case XFS_IOC_GET_RESBLKS: { + xfs_fsop_resblks_t out; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + error = xfs_reserve_blocks(mp, NULL, &out); + if (error) + return -error; + + if (copy_to_user(arg, &out, sizeof(out))) + return -XFS_ERROR(EFAULT); + + return 0; + } + + case XFS_IOC_FSGROWFSDATA: { + xfs_growfs_data_t in; + + if (copy_from_user(&in, arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_data(mp, &in); + return -error; + } + + case XFS_IOC_FSGROWFSLOG: { + xfs_growfs_log_t in; + + if (copy_from_user(&in, arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_log(mp, &in); + return -error; + } + + case XFS_IOC_FSGROWFSRT: { + xfs_growfs_rt_t in; + + if (copy_from_user(&in, arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_growfs_rt(mp, &in); + return -error; + } + + case XFS_IOC_GOINGDOWN: { + __uint32_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (get_user(in, (__uint32_t __user *)arg)) + return -XFS_ERROR(EFAULT); + + error = xfs_fs_goingdown(mp, in); + return -error; + } + + case XFS_IOC_ERROR_INJECTION: { + xfs_error_injection_t in; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&in, arg, sizeof(in))) + return -XFS_ERROR(EFAULT); + + error = xfs_errortag_add(in.errtag, mp); + return -error; + } + + case XFS_IOC_ERROR_CLEARALL: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + error = xfs_errortag_clearall(mp, 1); + return -error; + + default: + return -ENOTTY; + } +} diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h new file mode 100644 index 000000000000..d56173b34a2a --- /dev/null +++ b/fs/xfs/xfs_ioctl.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_IOCTL_H__ +#define __XFS_IOCTL_H__ + +extern int +xfs_ioc_space( + struct xfs_inode *ip, + struct inode *inode, + struct file *filp, + int ioflags, + unsigned int cmd, + xfs_flock64_t *bf); + +extern int +xfs_find_handle( + unsigned int cmd, + xfs_fsop_handlereq_t *hreq); + +extern int +xfs_open_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq); + +extern int +xfs_readlink_by_handle( + struct file *parfilp, + xfs_fsop_handlereq_t *hreq); + +extern int +xfs_attrmulti_attr_get( + struct inode *inode, + unsigned char *name, + unsigned char __user *ubuf, + __uint32_t *len, + __uint32_t flags); + +extern int +xfs_attrmulti_attr_set( + struct inode *inode, + unsigned char *name, + const unsigned char __user *ubuf, + __uint32_t len, + __uint32_t flags); + +extern int +xfs_attrmulti_attr_remove( + struct inode *inode, + unsigned char *name, + __uint32_t flags); + +extern struct dentry * +xfs_handle_to_dentry( + struct file *parfilp, + void __user *uhandle, + u32 hlen); + +extern long +xfs_file_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long p); + +extern long +xfs_file_compat_ioctl( + struct file *file, + unsigned int cmd, + unsigned long arg); + +#endif diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c new file mode 100644 index 000000000000..54e623bfbb85 --- /dev/null +++ b/fs/xfs/xfs_ioctl32.c @@ -0,0 +1,672 @@ +/* + * Copyright (c) 2004-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include +#include +#include +#include +#include +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_vnode.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_error.h" +#include "xfs_dfrag.h" +#include "xfs_vnodeops.h" +#include "xfs_fsops.h" +#include "xfs_alloc.h" +#include "xfs_rtalloc.h" +#include "xfs_attr.h" +#include "xfs_ioctl.h" +#include "xfs_ioctl32.h" +#include "xfs_trace.h" + +#define _NATIVE_IOC(cmd, type) \ + _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) + +#ifdef BROKEN_X86_ALIGNMENT +STATIC int +xfs_compat_flock64_copyin( + xfs_flock64_t *bf, + compat_xfs_flock64_t __user *arg32) +{ + if (get_user(bf->l_type, &arg32->l_type) || + get_user(bf->l_whence, &arg32->l_whence) || + get_user(bf->l_start, &arg32->l_start) || + get_user(bf->l_len, &arg32->l_len) || + get_user(bf->l_sysid, &arg32->l_sysid) || + get_user(bf->l_pid, &arg32->l_pid) || + copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_compat_ioc_fsgeometry_v1( + struct xfs_mount *mp, + compat_xfs_fsop_geom_v1_t __user *arg32) +{ + xfs_fsop_geom_t fsgeo; + int error; + + error = xfs_fs_geometry(mp, &fsgeo, 3); + if (error) + return -error; + /* The 32-bit variant simply has some padding at the end */ + if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_compat_growfs_data_copyin( + struct xfs_growfs_data *in, + compat_xfs_growfs_data_t __user *arg32) +{ + if (get_user(in->newblocks, &arg32->newblocks) || + get_user(in->imaxpct, &arg32->imaxpct)) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_compat_growfs_rt_copyin( + struct xfs_growfs_rt *in, + compat_xfs_growfs_rt_t __user *arg32) +{ + if (get_user(in->newblocks, &arg32->newblocks) || + get_user(in->extsize, &arg32->extsize)) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_inumbers_fmt_compat( + void __user *ubuffer, + const xfs_inogrp_t *buffer, + long count, + long *written) +{ + compat_xfs_inogrp_t __user *p32 = ubuffer; + long i; + + for (i = 0; i < count; i++) { + if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || + put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || + put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) + return -XFS_ERROR(EFAULT); + } + *written = count * sizeof(*p32); + return 0; +} + +#else +#define xfs_inumbers_fmt_compat xfs_inumbers_fmt +#endif /* BROKEN_X86_ALIGNMENT */ + +STATIC int +xfs_ioctl32_bstime_copyin( + xfs_bstime_t *bstime, + compat_xfs_bstime_t __user *bstime32) +{ + compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */ + + if (get_user(sec32, &bstime32->tv_sec) || + get_user(bstime->tv_nsec, &bstime32->tv_nsec)) + return -XFS_ERROR(EFAULT); + bstime->tv_sec = sec32; + return 0; +} + +/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ +STATIC int +xfs_ioctl32_bstat_copyin( + xfs_bstat_t *bstat, + compat_xfs_bstat_t __user *bstat32) +{ + if (get_user(bstat->bs_ino, &bstat32->bs_ino) || + get_user(bstat->bs_mode, &bstat32->bs_mode) || + get_user(bstat->bs_nlink, &bstat32->bs_nlink) || + get_user(bstat->bs_uid, &bstat32->bs_uid) || + get_user(bstat->bs_gid, &bstat32->bs_gid) || + get_user(bstat->bs_rdev, &bstat32->bs_rdev) || + get_user(bstat->bs_blksize, &bstat32->bs_blksize) || + get_user(bstat->bs_size, &bstat32->bs_size) || + xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) || + xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) || + xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) || + get_user(bstat->bs_blocks, &bstat32->bs_size) || + get_user(bstat->bs_xflags, &bstat32->bs_size) || + get_user(bstat->bs_extsize, &bstat32->bs_extsize) || + get_user(bstat->bs_extents, &bstat32->bs_extents) || + get_user(bstat->bs_gen, &bstat32->bs_gen) || + get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || + get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || + get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || + get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || + get_user(bstat->bs_aextents, &bstat32->bs_aextents)) + return -XFS_ERROR(EFAULT); + return 0; +} + +/* XFS_IOC_FSBULKSTAT and friends */ + +STATIC int +xfs_bstime_store_compat( + compat_xfs_bstime_t __user *p32, + const xfs_bstime_t *p) +{ + __s32 sec32; + + sec32 = p->tv_sec; + if (put_user(sec32, &p32->tv_sec) || + put_user(p->tv_nsec, &p32->tv_nsec)) + return -XFS_ERROR(EFAULT); + return 0; +} + +/* Return 0 on success or positive error (to xfs_bulkstat()) */ +STATIC int +xfs_bulkstat_one_fmt_compat( + void __user *ubuffer, + int ubsize, + int *ubused, + const xfs_bstat_t *buffer) +{ + compat_xfs_bstat_t __user *p32 = ubuffer; + + if (ubsize < sizeof(*p32)) + return XFS_ERROR(ENOMEM); + + if (put_user(buffer->bs_ino, &p32->bs_ino) || + put_user(buffer->bs_mode, &p32->bs_mode) || + put_user(buffer->bs_nlink, &p32->bs_nlink) || + put_user(buffer->bs_uid, &p32->bs_uid) || + put_user(buffer->bs_gid, &p32->bs_gid) || + put_user(buffer->bs_rdev, &p32->bs_rdev) || + put_user(buffer->bs_blksize, &p32->bs_blksize) || + put_user(buffer->bs_size, &p32->bs_size) || + xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || + xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || + xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || + put_user(buffer->bs_blocks, &p32->bs_blocks) || + put_user(buffer->bs_xflags, &p32->bs_xflags) || + put_user(buffer->bs_extsize, &p32->bs_extsize) || + put_user(buffer->bs_extents, &p32->bs_extents) || + put_user(buffer->bs_gen, &p32->bs_gen) || + put_user(buffer->bs_projid, &p32->bs_projid) || + put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || + put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || + put_user(buffer->bs_dmstate, &p32->bs_dmstate) || + put_user(buffer->bs_aextents, &p32->bs_aextents)) + return XFS_ERROR(EFAULT); + if (ubused) + *ubused = sizeof(*p32); + return 0; +} + +STATIC int +xfs_bulkstat_one_compat( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + int *ubused, /* bytes used by me */ + int *stat) /* BULKSTAT_RV_... */ +{ + return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, + xfs_bulkstat_one_fmt_compat, + ubused, stat); +} + +/* copied from xfs_ioctl.c */ +STATIC int +xfs_compat_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + compat_xfs_fsop_bulkreq_t __user *p32) +{ + u32 addr; + xfs_fsop_bulkreq_t bulkreq; + int count; /* # of records returned */ + xfs_ino_t inlast; /* last inode number */ + int done; + int error; + + /* done = 1 if there are more stats to get and if bulkstat */ + /* should be called again (unused here, but used in dmapi) */ + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + + if (XFS_FORCED_SHUTDOWN(mp)) + return -XFS_ERROR(EIO); + + if (get_user(addr, &p32->lastip)) + return -XFS_ERROR(EFAULT); + bulkreq.lastip = compat_ptr(addr); + if (get_user(bulkreq.icount, &p32->icount) || + get_user(addr, &p32->ubuffer)) + return -XFS_ERROR(EFAULT); + bulkreq.ubuffer = compat_ptr(addr); + if (get_user(addr, &p32->ocount)) + return -XFS_ERROR(EFAULT); + bulkreq.ocount = compat_ptr(addr); + + if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) + return -XFS_ERROR(EFAULT); + + if ((count = bulkreq.icount) <= 0) + return -XFS_ERROR(EINVAL); + + if (bulkreq.ubuffer == NULL) + return -XFS_ERROR(EINVAL); + + if (cmd == XFS_IOC_FSINUMBERS_32) { + error = xfs_inumbers(mp, &inlast, &count, + bulkreq.ubuffer, xfs_inumbers_fmt_compat); + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { + int res; + + error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, + sizeof(compat_xfs_bstat_t), 0, &res); + } else if (cmd == XFS_IOC_FSBULKSTAT_32) { + error = xfs_bulkstat(mp, &inlast, &count, + xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t), + bulkreq.ubuffer, &done); + } else + error = XFS_ERROR(EINVAL); + if (error) + return -error; + + if (bulkreq.ocount != NULL) { + if (copy_to_user(bulkreq.lastip, &inlast, + sizeof(xfs_ino_t))) + return -XFS_ERROR(EFAULT); + + if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) + return -XFS_ERROR(EFAULT); + } + + return 0; +} + +STATIC int +xfs_compat_handlereq_copyin( + xfs_fsop_handlereq_t *hreq, + compat_xfs_fsop_handlereq_t __user *arg32) +{ + compat_xfs_fsop_handlereq_t hreq32; + + if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + + hreq->fd = hreq32.fd; + hreq->path = compat_ptr(hreq32.path); + hreq->oflags = hreq32.oflags; + hreq->ihandle = compat_ptr(hreq32.ihandle); + hreq->ihandlen = hreq32.ihandlen; + hreq->ohandle = compat_ptr(hreq32.ohandle); + hreq->ohandlen = compat_ptr(hreq32.ohandlen); + + return 0; +} + +STATIC struct dentry * +xfs_compat_handlereq_to_dentry( + struct file *parfilp, + compat_xfs_fsop_handlereq_t *hreq) +{ + return xfs_handle_to_dentry(parfilp, + compat_ptr(hreq->ihandle), hreq->ihandlen); +} + +STATIC int +xfs_compat_attrlist_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + attrlist_cursor_kern_t *cursor; + compat_xfs_fsop_attrlist_handlereq_t al_hreq; + struct dentry *dentry; + char *kbuf; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&al_hreq, arg, + sizeof(compat_xfs_fsop_attrlist_handlereq_t))) + return -XFS_ERROR(EFAULT); + if (al_hreq.buflen > XATTR_LIST_MAX) + return -XFS_ERROR(EINVAL); + + /* + * Reject flags, only allow namespaces. + */ + if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) + return -XFS_ERROR(EINVAL); + + dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = -ENOMEM; + kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); + if (!kbuf) + goto out_dput; + + cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; + error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen, + al_hreq.flags, cursor); + if (error) + goto out_kfree; + + if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) + error = -EFAULT; + + out_kfree: + kfree(kbuf); + out_dput: + dput(dentry); + return error; +} + +STATIC int +xfs_compat_attrmulti_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + compat_xfs_attr_multiop_t *ops; + compat_xfs_fsop_attrmulti_handlereq_t am_hreq; + struct dentry *dentry; + unsigned int i, size; + unsigned char *attr_name; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&am_hreq, arg, + sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) + return -XFS_ERROR(EFAULT); + + /* overflow check */ + if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t)) + return -E2BIG; + + dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + error = E2BIG; + size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); + if (!size || size > 16 * PAGE_SIZE) + goto out_dput; + + ops = memdup_user(compat_ptr(am_hreq.ops), size); + if (IS_ERR(ops)) { + error = PTR_ERR(ops); + goto out_dput; + } + + attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); + if (!attr_name) + goto out_kfree_ops; + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_error = strncpy_from_user((char *)attr_name, + compat_ptr(ops[i].am_attrname), + MAXNAMELEN); + if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) + error = -ERANGE; + if (ops[i].am_error < 0) + break; + + switch (ops[i].am_opcode) { + case ATTR_OP_GET: + ops[i].am_error = xfs_attrmulti_attr_get( + dentry->d_inode, attr_name, + compat_ptr(ops[i].am_attrvalue), + &ops[i].am_length, ops[i].am_flags); + break; + case ATTR_OP_SET: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; + ops[i].am_error = xfs_attrmulti_attr_set( + dentry->d_inode, attr_name, + compat_ptr(ops[i].am_attrvalue), + ops[i].am_length, ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); + break; + case ATTR_OP_REMOVE: + ops[i].am_error = mnt_want_write(parfilp->f_path.mnt); + if (ops[i].am_error) + break; + ops[i].am_error = xfs_attrmulti_attr_remove( + dentry->d_inode, attr_name, + ops[i].am_flags); + mnt_drop_write(parfilp->f_path.mnt); + break; + default: + ops[i].am_error = EINVAL; + } + } + + if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) + error = XFS_ERROR(EFAULT); + + kfree(attr_name); + out_kfree_ops: + kfree(ops); + out_dput: + dput(dentry); + return -error; +} + +STATIC int +xfs_compat_fssetdm_by_handle( + struct file *parfilp, + void __user *arg) +{ + int error; + struct fsdmidata fsd; + compat_xfs_fsop_setdm_handlereq_t dmhreq; + struct dentry *dentry; + + if (!capable(CAP_MKNOD)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&dmhreq, arg, + sizeof(compat_xfs_fsop_setdm_handlereq_t))) + return -XFS_ERROR(EFAULT); + + dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) { + error = -XFS_ERROR(EPERM); + goto out; + } + + if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { + error = -XFS_ERROR(EFAULT); + goto out; + } + + error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask, + fsd.fsd_dmstate); + +out: + dput(dentry); + return error; +} + +long +xfs_file_compat_ioctl( + struct file *filp, + unsigned cmd, + unsigned long p) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; + int error; + + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + trace_xfs_file_compat_ioctl(ip); + + switch (cmd) { + /* No size or alignment issues on any arch */ + case XFS_IOC_DIOINFO: + case XFS_IOC_FSGEOMETRY: + case XFS_IOC_FSGETXATTR: + case XFS_IOC_FSSETXATTR: + case XFS_IOC_FSGETXATTRA: + case XFS_IOC_FSSETDM: + case XFS_IOC_GETBMAP: + case XFS_IOC_GETBMAPA: + case XFS_IOC_GETBMAPX: + case XFS_IOC_FSCOUNTS: + case XFS_IOC_SET_RESBLKS: + case XFS_IOC_GET_RESBLKS: + case XFS_IOC_FSGROWFSLOG: + case XFS_IOC_GOINGDOWN: + case XFS_IOC_ERROR_INJECTION: + case XFS_IOC_ERROR_CLEARALL: + return xfs_file_ioctl(filp, cmd, p); +#ifndef BROKEN_X86_ALIGNMENT + /* These are handled fine if no alignment issues */ + case XFS_IOC_ALLOCSP: + case XFS_IOC_FREESP: + case XFS_IOC_RESVSP: + case XFS_IOC_UNRESVSP: + case XFS_IOC_ALLOCSP64: + case XFS_IOC_FREESP64: + case XFS_IOC_RESVSP64: + case XFS_IOC_UNRESVSP64: + case XFS_IOC_FSGEOMETRY_V1: + case XFS_IOC_FSGROWFSDATA: + case XFS_IOC_FSGROWFSRT: + case XFS_IOC_ZERO_RANGE: + return xfs_file_ioctl(filp, cmd, p); +#else + case XFS_IOC_ALLOCSP_32: + case XFS_IOC_FREESP_32: + case XFS_IOC_ALLOCSP64_32: + case XFS_IOC_FREESP64_32: + case XFS_IOC_RESVSP_32: + case XFS_IOC_UNRESVSP_32: + case XFS_IOC_RESVSP64_32: + case XFS_IOC_UNRESVSP64_32: + case XFS_IOC_ZERO_RANGE_32: { + struct xfs_flock64 bf; + + if (xfs_compat_flock64_copyin(&bf, arg)) + return -XFS_ERROR(EFAULT); + cmd = _NATIVE_IOC(cmd, struct xfs_flock64); + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); + } + case XFS_IOC_FSGEOMETRY_V1_32: + return xfs_compat_ioc_fsgeometry_v1(mp, arg); + case XFS_IOC_FSGROWFSDATA_32: { + struct xfs_growfs_data in; + + if (xfs_compat_growfs_data_copyin(&in, arg)) + return -XFS_ERROR(EFAULT); + error = xfs_growfs_data(mp, &in); + return -error; + } + case XFS_IOC_FSGROWFSRT_32: { + struct xfs_growfs_rt in; + + if (xfs_compat_growfs_rt_copyin(&in, arg)) + return -XFS_ERROR(EFAULT); + error = xfs_growfs_rt(mp, &in); + return -error; + } +#endif + /* long changes size, but xfs only copiese out 32 bits */ + case XFS_IOC_GETXFLAGS_32: + case XFS_IOC_SETXFLAGS_32: + case XFS_IOC_GETVERSION_32: + cmd = _NATIVE_IOC(cmd, long); + return xfs_file_ioctl(filp, cmd, p); + case XFS_IOC_SWAPEXT_32: { + struct xfs_swapext sxp; + struct compat_xfs_swapext __user *sxu = arg; + + /* Bulk copy in up to the sx_stat field, then copy bstat */ + if (copy_from_user(&sxp, sxu, + offsetof(struct xfs_swapext, sx_stat)) || + xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); + return -error; + } + case XFS_IOC_FSBULKSTAT_32: + case XFS_IOC_FSBULKSTAT_SINGLE_32: + case XFS_IOC_FSINUMBERS_32: + return xfs_compat_ioc_bulkstat(mp, cmd, arg); + case XFS_IOC_FD_TO_HANDLE_32: + case XFS_IOC_PATH_TO_HANDLE_32: + case XFS_IOC_PATH_TO_FSHANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); + return xfs_find_handle(cmd, &hreq); + } + case XFS_IOC_OPEN_BY_HANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + return xfs_open_by_handle(filp, &hreq); + } + case XFS_IOC_READLINK_BY_HANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + return xfs_readlink_by_handle(filp, &hreq); + } + case XFS_IOC_ATTRLIST_BY_HANDLE_32: + return xfs_compat_attrlist_by_handle(filp, arg); + case XFS_IOC_ATTRMULTI_BY_HANDLE_32: + return xfs_compat_attrmulti_by_handle(filp, arg); + case XFS_IOC_FSSETDM_BY_HANDLE_32: + return xfs_compat_fssetdm_by_handle(filp, arg); + default: + return -XFS_ERROR(ENOIOCTLCMD); + } +} diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h new file mode 100644 index 000000000000..80f4060e8970 --- /dev/null +++ b/fs/xfs/xfs_ioctl32.h @@ -0,0 +1,237 @@ +/* + * Copyright (c) 2004-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_IOCTL32_H__ +#define __XFS_IOCTL32_H__ + +#include + +/* + * on 32-bit arches, ioctl argument structures may have different sizes + * and/or alignment. We define compat structures which match the + * 32-bit sizes/alignments here, and their associated ioctl numbers. + * + * xfs_ioctl32.c contains routines to copy these structures in and out. + */ + +/* stock kernel-level ioctls we support */ +#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS +#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS +#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION + +/* + * On intel, even if sizes match, alignment and/or padding may differ. + */ +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) +#define BROKEN_X86_ALIGNMENT +#define __compat_packed __attribute__((packed)) +#else +#define __compat_packed +#endif + +typedef struct compat_xfs_bstime { + compat_time_t tv_sec; /* seconds */ + __s32 tv_nsec; /* and nanoseconds */ +} compat_xfs_bstime_t; + +typedef struct compat_xfs_bstat { + __u64 bs_ino; /* inode number */ + __u16 bs_mode; /* type and mode */ + __u16 bs_nlink; /* number of links */ + __u32 bs_uid; /* user id */ + __u32 bs_gid; /* group id */ + __u32 bs_rdev; /* device value */ + __s32 bs_blksize; /* block size */ + __s64 bs_size; /* file size */ + compat_xfs_bstime_t bs_atime; /* access time */ + compat_xfs_bstime_t bs_mtime; /* modify time */ + compat_xfs_bstime_t bs_ctime; /* inode change time */ + int64_t bs_blocks; /* number of blocks */ + __u32 bs_xflags; /* extended flags */ + __s32 bs_extsize; /* extent size */ + __s32 bs_extents; /* number of extents */ + __u32 bs_gen; /* generation count */ + __u16 bs_projid_lo; /* lower part of project id */ +#define bs_projid bs_projid_lo /* (previously just bs_projid) */ + __u16 bs_projid_hi; /* high part of project id */ + unsigned char bs_pad[12]; /* pad space, unused */ + __u32 bs_dmevmask; /* DMIG event mask */ + __u16 bs_dmstate; /* DMIG state info */ + __u16 bs_aextents; /* attribute number of extents */ +} __compat_packed compat_xfs_bstat_t; + +typedef struct compat_xfs_fsop_bulkreq { + compat_uptr_t lastip; /* last inode # pointer */ + __s32 icount; /* count of entries in buffer */ + compat_uptr_t ubuffer; /* user buffer for inode desc. */ + compat_uptr_t ocount; /* output count pointer */ +} compat_xfs_fsop_bulkreq_t; + +#define XFS_IOC_FSBULKSTAT_32 \ + _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ + _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSINUMBERS_32 \ + _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) + +typedef struct compat_xfs_fsop_handlereq { + __u32 fd; /* fd for FD_TO_HANDLE */ + compat_uptr_t path; /* user pathname */ + __u32 oflags; /* open flags */ + compat_uptr_t ihandle; /* user supplied handle */ + __u32 ihandlen; /* user supplied length */ + compat_uptr_t ohandle; /* user buffer for handle */ + compat_uptr_t ohandlen; /* user buffer length */ +} compat_xfs_fsop_handlereq_t; + +#define XFS_IOC_PATH_TO_FSHANDLE_32 \ + _IOWR('X', 104, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_PATH_TO_HANDLE_32 \ + _IOWR('X', 105, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_FD_TO_HANDLE_32 \ + _IOWR('X', 106, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_OPEN_BY_HANDLE_32 \ + _IOWR('X', 107, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_READLINK_BY_HANDLE_32 \ + _IOWR('X', 108, struct compat_xfs_fsop_handlereq) + +/* The bstat field in the swapext struct needs translation */ +typedef struct compat_xfs_swapext { + __int64_t sx_version; /* version */ + __int64_t sx_fdtarget; /* fd of target file */ + __int64_t sx_fdtmp; /* fd of tmp file */ + xfs_off_t sx_offset; /* offset into file */ + xfs_off_t sx_length; /* leng from offset */ + char sx_pad[16]; /* pad space, unused */ + compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ +} __compat_packed compat_xfs_swapext_t; + +#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) + +typedef struct compat_xfs_fsop_attrlist_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ + struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ + __u32 flags; /* which namespace to use */ + __u32 buflen; /* length of buffer supplied */ + compat_uptr_t buffer; /* returned names */ +} __compat_packed compat_xfs_fsop_attrlist_handlereq_t; + +/* Note: actually this is read/write */ +#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \ + _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq) + +/* am_opcodes defined in xfs_fs.h */ +typedef struct compat_xfs_attr_multiop { + __u32 am_opcode; + __s32 am_error; + compat_uptr_t am_attrname; + compat_uptr_t am_attrvalue; + __u32 am_length; + __u32 am_flags; +} compat_xfs_attr_multiop_t; + +typedef struct compat_xfs_fsop_attrmulti_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ + __u32 opcount;/* count of following multiop */ + /* ptr to compat_xfs_attr_multiop */ + compat_uptr_t ops; /* attr_multi data */ +} compat_xfs_fsop_attrmulti_handlereq_t; + +#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \ + _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) + +typedef struct compat_xfs_fsop_setdm_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle information */ + /* ptr to struct fsdmidata */ + compat_uptr_t data; /* DMAPI data */ +} compat_xfs_fsop_setdm_handlereq_t; + +#define XFS_IOC_FSSETDM_BY_HANDLE_32 \ + _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq) + +#ifdef BROKEN_X86_ALIGNMENT +/* on ia32 l_start is on a 32-bit boundary */ +typedef struct compat_xfs_flock64 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +} compat_xfs_flock64_t; + +#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64) +#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64) +#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64) +#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64) +#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64) +#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) +#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) +#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) +#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64) + +typedef struct compat_xfs_fsop_geom_v1 { + __u32 blocksize; /* filesystem (data) block size */ + __u32 rtextsize; /* realtime extent size */ + __u32 agblocks; /* fsblocks in an AG */ + __u32 agcount; /* number of allocation groups */ + __u32 logblocks; /* fsblocks in the log */ + __u32 sectsize; /* (data) sector size, bytes */ + __u32 inodesize; /* inode size in bytes */ + __u32 imaxpct; /* max allowed inode space(%) */ + __u64 datablocks; /* fsblocks in data subvolume */ + __u64 rtblocks; /* fsblocks in realtime subvol */ + __u64 rtextents; /* rt extents in realtime subvol*/ + __u64 logstart; /* starting fsblock of the log */ + unsigned char uuid[16]; /* unique id of the filesystem */ + __u32 sunit; /* stripe unit, fsblocks */ + __u32 swidth; /* stripe width, fsblocks */ + __s32 version; /* structure version */ + __u32 flags; /* superblock version flags */ + __u32 logsectsize; /* log sector size, bytes */ + __u32 rtsectsize; /* realtime sector size, bytes */ + __u32 dirblocksize; /* directory block size, bytes */ +} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; + +#define XFS_IOC_FSGEOMETRY_V1_32 \ + _IOR('X', 100, struct compat_xfs_fsop_geom_v1) + +typedef struct compat_xfs_inogrp { + __u64 xi_startino; /* starting inode number */ + __s32 xi_alloccount; /* # bits set in allocmask */ + __u64 xi_allocmask; /* mask of allocated inodes */ +} __attribute__((packed)) compat_xfs_inogrp_t; + +/* These growfs input structures have padding on the end, so must translate */ +typedef struct compat_xfs_growfs_data { + __u64 newblocks; /* new data subvol size, fsblocks */ + __u32 imaxpct; /* new inode space percentage limit */ +} __attribute__((packed)) compat_xfs_growfs_data_t; + +typedef struct compat_xfs_growfs_rt { + __u64 newblocks; /* new realtime size, fsblocks */ + __u32 extsize; /* new realtime extent size, fsblocks */ +} __attribute__((packed)) compat_xfs_growfs_rt_t; + +#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data) +#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt) + +#endif /* BROKEN_X86_ALIGNMENT */ + +#endif /* __XFS_IOCTL32_H__ */ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c new file mode 100644 index 000000000000..b9c172b3fbbe --- /dev/null +++ b/fs/xfs/xfs_iops.c @@ -0,0 +1,1210 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_acl.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_rw.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_vnodeops.h" +#include "xfs_inode_item.h" +#include "xfs_trace.h" + +#include +#include +#include +#include +#include +#include +#include + +/* + * Bring the timestamps in the XFS inode uptodate. + * + * Used before writing the inode to disk. + */ +void +xfs_synchronize_times( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; + ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; + ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; + ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; +} + +/* + * If the linux inode is valid, mark it dirty. + * Used when committing a dirty inode into a transaction so that + * the inode will get written back by the linux code + */ +void +xfs_mark_inode_dirty_sync( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) + mark_inode_dirty_sync(inode); +} + +void +xfs_mark_inode_dirty( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + if (!(inode->i_state & (I_WILL_FREE|I_FREEING))) + mark_inode_dirty(inode); +} + +/* + * Hook in SELinux. This is not quite correct yet, what we really need + * here (as we do for default ACLs) is a mechanism by which creation of + * these attrs can be journalled at inode creation time (along with the + * inode, of course, such that log replay can't cause these to be lost). + */ +STATIC int +xfs_init_security( + struct inode *inode, + struct inode *dir, + const struct qstr *qstr) +{ + struct xfs_inode *ip = XFS_I(inode); + size_t length; + void *value; + unsigned char *name; + int error; + + error = security_inode_init_security(inode, dir, qstr, (char **)&name, + &value, &length); + if (error) { + if (error == -EOPNOTSUPP) + return 0; + return -error; + } + + error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); + + kfree(name); + kfree(value); + return error; +} + +static void +xfs_dentry_to_name( + struct xfs_name *namep, + struct dentry *dentry) +{ + namep->name = dentry->d_name.name; + namep->len = dentry->d_name.len; +} + +STATIC void +xfs_cleanup_inode( + struct inode *dir, + struct inode *inode, + struct dentry *dentry) +{ + struct xfs_name teardown; + + /* Oh, the horror. + * If we can't add the ACL or we fail in + * xfs_init_security we must back out. + * ENOSPC can hit here, among other things. + */ + xfs_dentry_to_name(&teardown, dentry); + + xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); + iput(inode); +} + +STATIC int +xfs_vn_mknod( + struct inode *dir, + struct dentry *dentry, + int mode, + dev_t rdev) +{ + struct inode *inode; + struct xfs_inode *ip = NULL; + struct posix_acl *default_acl = NULL; + struct xfs_name name; + int error; + + /* + * Irix uses Missed'em'V split, but doesn't want to see + * the upper 5 bits of (14bit) major. + */ + if (S_ISCHR(mode) || S_ISBLK(mode)) { + if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff)) + return -EINVAL; + rdev = sysv_encode_dev(rdev); + } else { + rdev = 0; + } + + if (IS_POSIXACL(dir)) { + default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(default_acl)) + return PTR_ERR(default_acl); + + if (!default_acl) + mode &= ~current_umask(); + } + + xfs_dentry_to_name(&name, dentry); + error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); + if (unlikely(error)) + goto out_free_acl; + + inode = VFS_I(ip); + + error = xfs_init_security(inode, dir, &dentry->d_name); + if (unlikely(error)) + goto out_cleanup_inode; + + if (default_acl) { + error = -xfs_inherit_acl(inode, default_acl); + default_acl = NULL; + if (unlikely(error)) + goto out_cleanup_inode; + } + + + d_instantiate(dentry, inode); + return -error; + + out_cleanup_inode: + xfs_cleanup_inode(dir, inode, dentry); + out_free_acl: + posix_acl_release(default_acl); + return -error; +} + +STATIC int +xfs_vn_create( + struct inode *dir, + struct dentry *dentry, + int mode, + struct nameidata *nd) +{ + return xfs_vn_mknod(dir, dentry, mode, 0); +} + +STATIC int +xfs_vn_mkdir( + struct inode *dir, + struct dentry *dentry, + int mode) +{ + return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0); +} + +STATIC struct dentry * +xfs_vn_lookup( + struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + struct xfs_inode *cip; + struct xfs_name name; + int error; + + if (dentry->d_name.len >= MAXNAMELEN) + return ERR_PTR(-ENAMETOOLONG); + + xfs_dentry_to_name(&name, dentry); + error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); + if (unlikely(error)) { + if (unlikely(error != ENOENT)) + return ERR_PTR(-error); + d_add(dentry, NULL); + return NULL; + } + + return d_splice_alias(VFS_I(cip), dentry); +} + +STATIC struct dentry * +xfs_vn_ci_lookup( + struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + struct xfs_inode *ip; + struct xfs_name xname; + struct xfs_name ci_name; + struct qstr dname; + int error; + + if (dentry->d_name.len >= MAXNAMELEN) + return ERR_PTR(-ENAMETOOLONG); + + xfs_dentry_to_name(&xname, dentry); + error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); + if (unlikely(error)) { + if (unlikely(error != ENOENT)) + return ERR_PTR(-error); + /* + * call d_add(dentry, NULL) here when d_drop_negative_children + * is called in xfs_vn_mknod (ie. allow negative dentries + * with CI filesystems). + */ + return NULL; + } + + /* if exact match, just splice and exit */ + if (!ci_name.name) + return d_splice_alias(VFS_I(ip), dentry); + + /* else case-insensitive match... */ + dname.name = ci_name.name; + dname.len = ci_name.len; + dentry = d_add_ci(dentry, VFS_I(ip), &dname); + kmem_free(ci_name.name); + return dentry; +} + +STATIC int +xfs_vn_link( + struct dentry *old_dentry, + struct inode *dir, + struct dentry *dentry) +{ + struct inode *inode = old_dentry->d_inode; + struct xfs_name name; + int error; + + xfs_dentry_to_name(&name, dentry); + + error = xfs_link(XFS_I(dir), XFS_I(inode), &name); + if (unlikely(error)) + return -error; + + ihold(inode); + d_instantiate(dentry, inode); + return 0; +} + +STATIC int +xfs_vn_unlink( + struct inode *dir, + struct dentry *dentry) +{ + struct xfs_name name; + int error; + + xfs_dentry_to_name(&name, dentry); + + error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); + if (error) + return error; + + /* + * With unlink, the VFS makes the dentry "negative": no inode, + * but still hashed. This is incompatible with case-insensitive + * mode, so invalidate (unhash) the dentry in CI-mode. + */ + if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb)) + d_invalidate(dentry); + return 0; +} + +STATIC int +xfs_vn_symlink( + struct inode *dir, + struct dentry *dentry, + const char *symname) +{ + struct inode *inode; + struct xfs_inode *cip = NULL; + struct xfs_name name; + int error; + mode_t mode; + + mode = S_IFLNK | + (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); + xfs_dentry_to_name(&name, dentry); + + error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); + if (unlikely(error)) + goto out; + + inode = VFS_I(cip); + + error = xfs_init_security(inode, dir, &dentry->d_name); + if (unlikely(error)) + goto out_cleanup_inode; + + d_instantiate(dentry, inode); + return 0; + + out_cleanup_inode: + xfs_cleanup_inode(dir, inode, dentry); + out: + return -error; +} + +STATIC int +xfs_vn_rename( + struct inode *odir, + struct dentry *odentry, + struct inode *ndir, + struct dentry *ndentry) +{ + struct inode *new_inode = ndentry->d_inode; + struct xfs_name oname; + struct xfs_name nname; + + xfs_dentry_to_name(&oname, odentry); + xfs_dentry_to_name(&nname, ndentry); + + return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), + XFS_I(ndir), &nname, new_inode ? + XFS_I(new_inode) : NULL); +} + +/* + * careful here - this function can get called recursively, so + * we need to be very careful about how much stack we use. + * uio is kmalloced for this reason... + */ +STATIC void * +xfs_vn_follow_link( + struct dentry *dentry, + struct nameidata *nd) +{ + char *link; + int error = -ENOMEM; + + link = kmalloc(MAXPATHLEN+1, GFP_KERNEL); + if (!link) + goto out_err; + + error = -xfs_readlink(XFS_I(dentry->d_inode), link); + if (unlikely(error)) + goto out_kfree; + + nd_set_link(nd, link); + return NULL; + + out_kfree: + kfree(link); + out_err: + nd_set_link(nd, ERR_PTR(error)); + return NULL; +} + +STATIC void +xfs_vn_put_link( + struct dentry *dentry, + struct nameidata *nd, + void *p) +{ + char *s = nd_get_link(nd); + + if (!IS_ERR(s)) + kfree(s); +} + +STATIC int +xfs_vn_getattr( + struct vfsmount *mnt, + struct dentry *dentry, + struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + + trace_xfs_getattr(ip); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + stat->size = XFS_ISIZE(ip); + stat->dev = inode->i_sb->s_dev; + stat->mode = ip->i_d.di_mode; + stat->nlink = ip->i_d.di_nlink; + stat->uid = ip->i_d.di_uid; + stat->gid = ip->i_d.di_gid; + stat->ino = ip->i_ino; + stat->atime = inode->i_atime; + stat->mtime = inode->i_mtime; + stat->ctime = inode->i_ctime; + stat->blocks = + XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); + + + switch (inode->i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + stat->blksize = BLKDEV_IOSIZE; + stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: + if (XFS_IS_REALTIME_INODE(ip)) { + /* + * If the file blocks are being allocated from a + * realtime volume, then return the inode's realtime + * extent size or the realtime volume's extent size. + */ + stat->blksize = + xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog; + } else + stat->blksize = xfs_preferred_iosize(mp); + stat->rdev = 0; + break; + } + + return 0; +} + +int +xfs_setattr_nonsize( + struct xfs_inode *ip, + struct iattr *iattr, + int flags) +{ + xfs_mount_t *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + int mask = iattr->ia_valid; + xfs_trans_t *tp; + int error; + uid_t uid = 0, iuid = 0; + gid_t gid = 0, igid = 0; + struct xfs_dquot *udqp = NULL, *gdqp = NULL; + struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL; + + trace_xfs_setattr(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + error = -inode_change_ok(inode, iattr); + if (error) + return XFS_ERROR(error); + + ASSERT((mask & ATTR_SIZE) == 0); + + /* + * If disk quotas is on, we make sure that the dquots do exist on disk, + * before we start any other transactions. Trying to do this later + * is messy. We don't care to take a readlock to look at the ids + * in inode here, because we can't hold it across the trans_reserve. + * If the IDs do change before we take the ilock, we're covered + * because the i_*dquot fields will get updated anyway. + */ + if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { + uint qflags = 0; + + if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { + uid = iattr->ia_uid; + qflags |= XFS_QMOPT_UQUOTA; + } else { + uid = ip->i_d.di_uid; + } + if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { + gid = iattr->ia_gid; + qflags |= XFS_QMOPT_GQUOTA; + } else { + gid = ip->i_d.di_gid; + } + + /* + * We take a reference when we initialize udqp and gdqp, + * so it is important that we never blindly double trip on + * the same variable. See xfs_create() for an example. + */ + ASSERT(udqp == NULL); + ASSERT(gdqp == NULL); + error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), + qflags, &udqp, &gdqp); + if (error) + return error; + } + + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (error) + goto out_dqrele; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & (ATTR_UID|ATTR_GID)) { + /* + * These IDs could have changed since we last looked at them. + * But, we're assured that if the ownership did change + * while we didn't have the inode locked, inode's dquot(s) + * would have changed also. + */ + iuid = ip->i_d.di_uid; + igid = ip->i_d.di_gid; + gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; + uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; + + /* + * Do a quota reservation only if uid/gid is actually + * going to change. + */ + if (XFS_IS_QUOTA_RUNNING(mp) && + ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || + (XFS_IS_GQUOTA_ON(mp) && igid != gid))) { + ASSERT(tp); + error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, + capable(CAP_FOWNER) ? + XFS_QMOPT_FORCE_RES : 0); + if (error) /* out of quota */ + goto out_trans_cancel; + } + } + + xfs_trans_ijoin(tp, ip); + + /* + * Change file ownership. Must be the owner or privileged. + */ + if (mask & (ATTR_UID|ATTR_GID)) { + /* + * CAP_FSETID overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be + * cleared upon successful return from chown() + */ + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable(CAP_FSETID)) + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); + + /* + * Change the ownerships and register quota modifications + * in the transaction. + */ + if (iuid != uid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) { + ASSERT(mask & ATTR_UID); + ASSERT(udqp); + olddquot1 = xfs_qm_vop_chown(tp, ip, + &ip->i_udquot, udqp); + } + ip->i_d.di_uid = uid; + inode->i_uid = uid; + } + if (igid != gid) { + if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { + ASSERT(!XFS_IS_PQUOTA_ON(mp)); + ASSERT(mask & ATTR_GID); + ASSERT(gdqp); + olddquot2 = xfs_qm_vop_chown(tp, ip, + &ip->i_gdquot, gdqp); + } + ip->i_d.di_gid = gid; + inode->i_gid = gid; + } + } + + /* + * Change file access modes. + */ + if (mask & ATTR_MODE) { + umode_t mode = iattr->ia_mode; + + if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + mode &= ~S_ISGID; + + ip->i_d.di_mode &= S_IFMT; + ip->i_d.di_mode |= mode & ~S_IFMT; + + inode->i_mode &= S_IFMT; + inode->i_mode |= mode & ~S_IFMT; + } + + /* + * Change file access or modified times. + */ + if (mask & ATTR_ATIME) { + inode->i_atime = iattr->ia_atime; + ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; + ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; + } + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_INC(xs_ig_attrchg); + + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + /* + * Release any dquot(s) the inode had kept before chown. + */ + xfs_qm_dqrele(olddquot1); + xfs_qm_dqrele(olddquot2); + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + + if (error) + return XFS_ERROR(error); + + /* + * XXX(hch): Updating the ACL entries is not atomic vs the i_mode + * update. We could avoid this with linked transactions + * and passing down the transaction pointer all the way + * to attr_set. No previous user of the generic + * Posix ACL code seems to care about this issue either. + */ + if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { + error = -xfs_acl_chmod(inode); + if (error) + return XFS_ERROR(error); + } + + return 0; + +out_trans_cancel: + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); +out_dqrele: + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + return error; +} + +/* + * Truncate file. Must have write permission and not be a directory. + */ +int +xfs_setattr_size( + struct xfs_inode *ip, + struct iattr *iattr, + int flags) +{ + struct xfs_mount *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + int mask = iattr->ia_valid; + struct xfs_trans *tp; + int error; + uint lock_flags; + uint commit_flags = 0; + + trace_xfs_setattr(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + error = -inode_change_ok(inode, iattr); + if (error) + return XFS_ERROR(error); + + ASSERT(S_ISREG(ip->i_d.di_mode)); + ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| + ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID| + ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); + + lock_flags = XFS_ILOCK_EXCL; + if (!(flags & XFS_ATTR_NOLOCK)) + lock_flags |= XFS_IOLOCK_EXCL; + xfs_ilock(ip, lock_flags); + + /* + * Short circuit the truncate case for zero length files. + */ + if (iattr->ia_size == 0 && + ip->i_size == 0 && ip->i_d.di_nextents == 0) { + if (!(mask & (ATTR_CTIME|ATTR_MTIME))) + goto out_unlock; + + /* + * Use the regular setattr path to update the timestamps. + */ + xfs_iunlock(ip, lock_flags); + iattr->ia_valid &= ~ATTR_SIZE; + return xfs_setattr_nonsize(ip, iattr, 0); + } + + /* + * Make sure that the dquots are attached to the inode. + */ + error = xfs_qm_dqattach_locked(ip, 0); + if (error) + goto out_unlock; + + /* + * Now we can make the changes. Before we join the inode to the + * transaction, take care of the part of the truncation that must be + * done without the inode lock. This needs to be done before joining + * the inode to the transaction, because the inode cannot be unlocked + * once it is a part of the transaction. + */ + if (iattr->ia_size > ip->i_size) { + /* + * Do the first part of growing a file: zero any data in the + * last block that is beyond the old EOF. We need to do this + * before the inode is joined to the transaction to modify + * i_size. + */ + error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); + if (error) + goto out_unlock; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + lock_flags &= ~XFS_ILOCK_EXCL; + + /* + * We are going to log the inode size change in this transaction so + * any previous writes that are beyond the on disk EOF and the new + * EOF that have not been written out need to be written here. If we + * do not write the data out, we expose ourselves to the null files + * problem. + * + * Only flush from the on disk size to the smaller of the in memory + * file size or the new size as that's the range we really care about + * here and prevents waiting for other data not within the range we + * care about here. + */ + if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) { + error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size, + XBF_ASYNC, FI_NONE); + if (error) + goto out_unlock; + } + + /* + * Wait for all I/O to complete. + */ + xfs_ioend_wait(ip); + + error = -block_truncate_page(inode->i_mapping, iattr->ia_size, + xfs_get_blocks); + if (error) + goto out_unlock; + + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE); + error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (error) + goto out_trans_cancel; + + truncate_setsize(inode, iattr->ia_size); + + commit_flags = XFS_TRANS_RELEASE_LOG_RES; + lock_flags |= XFS_ILOCK_EXCL; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip); + + /* + * Only change the c/mtime if we are changing the size or we are + * explicitly asked to change it. This handles the semantic difference + * between truncate() and ftruncate() as implemented in the VFS. + * + * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a + * special case where we need to update the times despite not having + * these flags set. For all other operations the VFS set these flags + * explicitly if it wants a timestamp update. + */ + if (iattr->ia_size != ip->i_size && + (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { + iattr->ia_ctime = iattr->ia_mtime = + current_fs_time(inode->i_sb); + mask |= ATTR_CTIME | ATTR_MTIME; + } + + if (iattr->ia_size > ip->i_size) { + ip->i_d.di_size = iattr->ia_size; + ip->i_size = iattr->ia_size; + } else if (iattr->ia_size <= ip->i_size || + (iattr->ia_size == 0 && ip->i_d.di_nextents)) { + error = xfs_itruncate_data(&tp, ip, iattr->ia_size); + if (error) + goto out_trans_abort; + + /* + * Truncated "down", so we're removing references to old data + * here - if we delay flushing for a long time, we expose + * ourselves unduly to the notorious NULL files problem. So, + * we mark this inode and flush it when the file is closed, + * and do not wait the usual (long) time for writeout. + */ + xfs_iflags_set(ip, XFS_ITRUNCATED); + } + + if (mask & ATTR_CTIME) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; + ip->i_update_core = 1; + } + if (mask & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; + ip->i_update_core = 1; + } + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + XFS_STATS_INC(xs_ig_attrchg); + + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +out_unlock: + if (lock_flags) + xfs_iunlock(ip, lock_flags); + return error; + +out_trans_abort: + commit_flags |= XFS_TRANS_ABORT; +out_trans_cancel: + xfs_trans_cancel(tp, commit_flags); + goto out_unlock; +} + +STATIC int +xfs_vn_setattr( + struct dentry *dentry, + struct iattr *iattr) +{ + if (iattr->ia_valid & ATTR_SIZE) + return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); + return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); +} + +#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) + +/* + * Call fiemap helper to fill in user data. + * Returns positive errors to xfs_getbmap. + */ +STATIC int +xfs_fiemap_format( + void **arg, + struct getbmapx *bmv, + int *full) +{ + int error; + struct fiemap_extent_info *fieinfo = *arg; + u32 fiemap_flags = 0; + u64 logical, physical, length; + + /* Do nothing for a hole */ + if (bmv->bmv_block == -1LL) + return 0; + + logical = BBTOB(bmv->bmv_offset); + physical = BBTOB(bmv->bmv_block); + length = BBTOB(bmv->bmv_length); + + if (bmv->bmv_oflags & BMV_OF_PREALLOC) + fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; + else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { + fiemap_flags |= FIEMAP_EXTENT_DELALLOC; + physical = 0; /* no block yet */ + } + if (bmv->bmv_oflags & BMV_OF_LAST) + fiemap_flags |= FIEMAP_EXTENT_LAST; + + error = fiemap_fill_next_extent(fieinfo, logical, physical, + length, fiemap_flags); + if (error > 0) { + error = 0; + *full = 1; /* user array now full */ + } + + return -error; +} + +STATIC int +xfs_vn_fiemap( + struct inode *inode, + struct fiemap_extent_info *fieinfo, + u64 start, + u64 length) +{ + xfs_inode_t *ip = XFS_I(inode); + struct getbmapx bm; + int error; + + error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); + if (error) + return error; + + /* Set up bmap header for xfs internal routine */ + bm.bmv_offset = BTOBB(start); + /* Special case for whole file */ + if (length == FIEMAP_MAX_OFFSET) + bm.bmv_length = -1LL; + else + bm.bmv_length = BTOBB(length); + + /* We add one because in getbmap world count includes the header */ + bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM : + fieinfo->fi_extents_max + 1; + bm.bmv_count = min_t(__s32, bm.bmv_count, + (PAGE_SIZE * 16 / sizeof(struct getbmapx))); + bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES; + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) + bm.bmv_iflags |= BMV_IF_ATTRFORK; + if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) + bm.bmv_iflags |= BMV_IF_DELALLOC; + + error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); + if (error) + return -error; + + return 0; +} + +static const struct inode_operations xfs_inode_operations = { + .get_acl = xfs_get_acl, + .getattr = xfs_vn_getattr, + .setattr = xfs_vn_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = xfs_vn_listxattr, + .fiemap = xfs_vn_fiemap, +}; + +static const struct inode_operations xfs_dir_inode_operations = { + .create = xfs_vn_create, + .lookup = xfs_vn_lookup, + .link = xfs_vn_link, + .unlink = xfs_vn_unlink, + .symlink = xfs_vn_symlink, + .mkdir = xfs_vn_mkdir, + /* + * Yes, XFS uses the same method for rmdir and unlink. + * + * There are some subtile differences deeper in the code, + * but we use S_ISDIR to check for those. + */ + .rmdir = xfs_vn_unlink, + .mknod = xfs_vn_mknod, + .rename = xfs_vn_rename, + .get_acl = xfs_get_acl, + .getattr = xfs_vn_getattr, + .setattr = xfs_vn_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = xfs_vn_listxattr, +}; + +static const struct inode_operations xfs_dir_ci_inode_operations = { + .create = xfs_vn_create, + .lookup = xfs_vn_ci_lookup, + .link = xfs_vn_link, + .unlink = xfs_vn_unlink, + .symlink = xfs_vn_symlink, + .mkdir = xfs_vn_mkdir, + /* + * Yes, XFS uses the same method for rmdir and unlink. + * + * There are some subtile differences deeper in the code, + * but we use S_ISDIR to check for those. + */ + .rmdir = xfs_vn_unlink, + .mknod = xfs_vn_mknod, + .rename = xfs_vn_rename, + .get_acl = xfs_get_acl, + .getattr = xfs_vn_getattr, + .setattr = xfs_vn_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = xfs_vn_listxattr, +}; + +static const struct inode_operations xfs_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = xfs_vn_follow_link, + .put_link = xfs_vn_put_link, + .get_acl = xfs_get_acl, + .getattr = xfs_vn_getattr, + .setattr = xfs_vn_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = xfs_vn_listxattr, +}; + +STATIC void +xfs_diflags_to_iflags( + struct inode *inode, + struct xfs_inode *ip) +{ + if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) + inode->i_flags |= S_SYNC; + else + inode->i_flags &= ~S_SYNC; + if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) + inode->i_flags |= S_NOATIME; + else + inode->i_flags &= ~S_NOATIME; +} + +/* + * Initialize the Linux inode, set up the operation vectors and + * unlock the inode. + * + * When reading existing inodes from disk this is called directly + * from xfs_iget, when creating a new inode it is called from + * xfs_ialloc after setting up the inode. + * + * We are always called with an uninitialised linux inode here. + * We need to initialise the necessary fields and take a reference + * on it. + */ +void +xfs_setup_inode( + struct xfs_inode *ip) +{ + struct inode *inode = &ip->i_vnode; + + inode->i_ino = ip->i_ino; + inode->i_state = I_NEW; + + inode_sb_list_add(inode); + /* make the inode look hashed for the writeback code */ + hlist_add_fake(&inode->i_hash); + + inode->i_mode = ip->i_d.di_mode; + inode->i_nlink = ip->i_d.di_nlink; + inode->i_uid = ip->i_d.di_uid; + inode->i_gid = ip->i_d.di_gid; + + switch (inode->i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + inode->i_rdev = + MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: + inode->i_rdev = 0; + break; + } + + inode->i_generation = ip->i_d.di_gen; + i_size_write(inode, ip->i_d.di_size); + inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; + inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; + inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; + inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; + inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; + inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + xfs_diflags_to_iflags(inode, ip); + + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_op = &xfs_inode_operations; + inode->i_fop = &xfs_file_operations; + inode->i_mapping->a_ops = &xfs_address_space_operations; + break; + case S_IFDIR: + if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) + inode->i_op = &xfs_dir_ci_inode_operations; + else + inode->i_op = &xfs_dir_inode_operations; + inode->i_fop = &xfs_dir_file_operations; + break; + case S_IFLNK: + inode->i_op = &xfs_symlink_inode_operations; + if (!(ip->i_df.if_flags & XFS_IFINLINE)) + inode->i_mapping->a_ops = &xfs_address_space_operations; + break; + default: + inode->i_op = &xfs_inode_operations; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + break; + } + + /* + * If there is no attribute fork no ACL can exist on this inode, + * and it can't have any file capabilities attached to it either. + */ + if (!XFS_IFORK_Q(ip)) { + inode_has_no_xattr(inode); + cache_no_acl(inode); + } + + xfs_iflags_clear(ip, XFS_INEW); + barrier(); + + unlock_new_inode(inode); +} diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h new file mode 100644 index 000000000000..ef41c92ce66e --- /dev/null +++ b/fs/xfs/xfs_iops.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_IOPS_H__ +#define __XFS_IOPS_H__ + +struct xfs_inode; + +extern const struct file_operations xfs_file_operations; +extern const struct file_operations xfs_dir_file_operations; + +extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); + +extern void xfs_setup_inode(struct xfs_inode *); + +#endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h new file mode 100644 index 000000000000..1e8a45e74c3e --- /dev/null +++ b/fs/xfs/xfs_linux.h @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_LINUX__ +#define __XFS_LINUX__ + +#include + +/* + * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. + * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. + */ +#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64) +# define XFS_BIG_BLKNOS 1 +# define XFS_BIG_INUMS 1 +#else +# define XFS_BIG_BLKNOS 0 +# define XFS_BIG_INUMS 0 +#endif + +#include "xfs_types.h" + +#include "kmem.h" +#include "mrlock.h" +#include "time.h" +#include "uuid.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "xfs_vnode.h" +#include "xfs_stats.h" +#include "xfs_sysctl.h" +#include "xfs_iops.h" +#include "xfs_aops.h" +#include "xfs_super.h" +#include "xfs_buf.h" +#include "xfs_message.h" + +#ifdef __BIG_ENDIAN +#define XFS_NATIVE_HOST 1 +#else +#undef XFS_NATIVE_HOST +#endif + +/* + * Feature macros (disable/enable) + */ +#ifdef CONFIG_SMP +#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ +#else +#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ +#endif + +#define irix_sgid_inherit xfs_params.sgid_inherit.val +#define irix_symlink_mode xfs_params.symlink_mode.val +#define xfs_panic_mask xfs_params.panic_mask.val +#define xfs_error_level xfs_params.error_level.val +#define xfs_syncd_centisecs xfs_params.syncd_timer.val +#define xfs_stats_clear xfs_params.stats_clear.val +#define xfs_inherit_sync xfs_params.inherit_sync.val +#define xfs_inherit_nodump xfs_params.inherit_nodump.val +#define xfs_inherit_noatime xfs_params.inherit_noatim.val +#define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val +#define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val +#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val +#define xfs_rotorstep xfs_params.rotorstep.val +#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val +#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val + +#define current_cpu() (raw_smp_processor_id()) +#define current_pid() (current->pid) +#define current_test_flags(f) (current->flags & (f)) +#define current_set_flags_nested(sp, f) \ + (*(sp) = current->flags, current->flags |= (f)) +#define current_clear_flags_nested(sp, f) \ + (*(sp) = current->flags, current->flags &= ~(f)) +#define current_restore_flags_nested(sp, f) \ + (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) + +#define spinlock_destroy(lock) + +#define NBBY 8 /* number of bits per byte */ + +/* + * Size of block device i/o is parameterized here. + * Currently the system supports page-sized i/o. + */ +#define BLKDEV_IOSHIFT PAGE_CACHE_SHIFT +#define BLKDEV_IOSIZE (1<> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + *(__u64 *)a = c; + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 xfs_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + unsigned long __upper, __low, __high, __mod; + __u64 c = *(__u64 *)a; + __upper = __high = c >> 32; + __low = c; + if (__high) { + __upper = __high % (b); + __high = __high / (b); + } + asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper)); + asm("":"=A" (c):"a" (__low),"d" (__high)); + return __mod; + } + } + + /* NOTREACHED */ + return 0; +} +#else +static inline __u32 xfs_do_div(void *a, __u32 b, int n) +{ + __u32 mod; + + switch (n) { + case 4: + mod = *(__u32 *)a % b; + *(__u32 *)a = *(__u32 *)a / b; + return mod; + case 8: + mod = do_div(*(__u64 *)a, b); + return mod; + } + + /* NOTREACHED */ + return 0; +} + +/* Side effect free 64 bit mod operation */ +static inline __u32 xfs_do_mod(void *a, __u32 b, int n) +{ + switch (n) { + case 4: + return *(__u32 *)a % b; + case 8: + { + __u64 c = *(__u64 *)a; + return do_div(c, b); + } + } + + /* NOTREACHED */ + return 0; +} +#endif + +#undef do_div +#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a)) +#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a)) + +static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y) +{ + x += y - 1; + do_div(x, y); + return(x * y); +} + +static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) +{ + x += y - 1; + do_div(x, y); + return x; +} + +/* ARM old ABI has some weird alignment/padding */ +#if defined(__arm__) && !defined(__ARM_EABI__) +#define __arch_pack __attribute__((packed)) +#else +#define __arch_pack +#endif + +#define ASSERT_ALWAYS(expr) \ + (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + +#ifndef DEBUG +#define ASSERT(expr) ((void)0) + +#ifndef STATIC +# define STATIC static noinline +#endif + +#else /* DEBUG */ + +#define ASSERT(expr) \ + (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + +#ifndef STATIC +# define STATIC noinline +#endif + +#endif /* DEBUG */ + +#endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c new file mode 100644 index 000000000000..bd672def95ac --- /dev/null +++ b/fs/xfs/xfs_message.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" + +/* + * XFS logging functions + */ +static void +__xfs_printk( + const char *level, + const struct xfs_mount *mp, + struct va_format *vaf) +{ + if (mp && mp->m_fsname) { + printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf); + return; + } + printk("%sXFS: %pV\n", level, vaf); +} + +#define define_xfs_printk_level(func, kern_level) \ +void func(const struct xfs_mount *mp, const char *fmt, ...) \ +{ \ + struct va_format vaf; \ + va_list args; \ + \ + va_start(args, fmt); \ + \ + vaf.fmt = fmt; \ + vaf.va = &args; \ + \ + __xfs_printk(kern_level, mp, &vaf); \ + va_end(args); \ +} \ + +define_xfs_printk_level(xfs_emerg, KERN_EMERG); +define_xfs_printk_level(xfs_alert, KERN_ALERT); +define_xfs_printk_level(xfs_crit, KERN_CRIT); +define_xfs_printk_level(xfs_err, KERN_ERR); +define_xfs_printk_level(xfs_warn, KERN_WARNING); +define_xfs_printk_level(xfs_notice, KERN_NOTICE); +define_xfs_printk_level(xfs_info, KERN_INFO); +#ifdef DEBUG +define_xfs_printk_level(xfs_debug, KERN_DEBUG); +#endif + +void +xfs_alert_tag( + const struct xfs_mount *mp, + int panic_tag, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + int do_panic = 0; + + if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { + xfs_alert(mp, "Transforming an alert into a BUG."); + do_panic = 1; + } + + va_start(args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + __xfs_printk(KERN_ALERT, mp, &vaf); + va_end(args); + + BUG_ON(do_panic); +} + +void +assfail(char *expr, char *file, int line) +{ + xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d", + expr, file, line); + BUG(); +} + +void +xfs_hex_dump(void *p, int length) +{ + print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1); +} diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h new file mode 100644 index 000000000000..7fb7ea007672 --- /dev/null +++ b/fs/xfs/xfs_message.h @@ -0,0 +1,39 @@ +#ifndef __XFS_MESSAGE_H +#define __XFS_MESSAGE_H 1 + +struct xfs_mount; + +extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_alert_tag(const struct xfs_mount *mp, int tag, + const char *fmt, ...) + __attribute__ ((format (printf, 3, 4))); +extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); + +#ifdef DEBUG +extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))); +#else +static inline void +__attribute__ ((format (printf, 2, 3))) +xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) +{ +} +#endif + +extern void assfail(char *expr, char *f, int l); + +extern void xfs_hex_dump(void *p, int length); + +#endif /* __XFS_MESSAGE_H */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c new file mode 100644 index 000000000000..9a0aa76facdf --- /dev/null +++ b/fs/xfs/xfs_qm.c @@ -0,0 +1,2416 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_bmap.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_space.h" +#include "xfs_utils.h" +#include "xfs_qm.h" +#include "xfs_trace.h" + +/* + * The global quota manager. There is only one of these for the entire + * system, _not_ one per file system. XQM keeps track of the overall + * quota functionality, including maintaining the freelist and hash + * tables of dquots. + */ +struct mutex xfs_Gqm_lock; +struct xfs_qm *xfs_Gqm; +uint ndquot; + +kmem_zone_t *qm_dqzone; +kmem_zone_t *qm_dqtrxzone; + +STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); +STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); + +STATIC int xfs_qm_init_quotainos(xfs_mount_t *); +STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); +STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); + +static struct shrinker xfs_qm_shaker = { + .shrink = xfs_qm_shake, + .seeks = DEFAULT_SEEKS, +}; + +/* + * Initialize the XQM structure. + * Note that there is not one quota manager per file system. + */ +STATIC struct xfs_qm * +xfs_Gqm_init(void) +{ + xfs_dqhash_t *udqhash, *gdqhash; + xfs_qm_t *xqm; + size_t hsize; + uint i; + + /* + * Initialize the dquot hash tables. + */ + udqhash = kmem_zalloc_greedy(&hsize, + XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), + XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t)); + if (!udqhash) + goto out; + + gdqhash = kmem_zalloc_large(hsize); + if (!gdqhash) + goto out_free_udqhash; + + hsize /= sizeof(xfs_dqhash_t); + ndquot = hsize << 8; + + xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP); + xqm->qm_dqhashmask = hsize - 1; + xqm->qm_usr_dqhtable = udqhash; + xqm->qm_grp_dqhtable = gdqhash; + ASSERT(xqm->qm_usr_dqhtable != NULL); + ASSERT(xqm->qm_grp_dqhtable != NULL); + + for (i = 0; i < hsize; i++) { + xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i); + xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i); + } + + /* + * Freelist of all dquots of all file systems + */ + INIT_LIST_HEAD(&xqm->qm_dqfrlist); + xqm->qm_dqfrlist_cnt = 0; + mutex_init(&xqm->qm_dqfrlist_lock); + + /* + * dquot zone. we register our own low-memory callback. + */ + if (!qm_dqzone) { + xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t), + "xfs_dquots"); + qm_dqzone = xqm->qm_dqzone; + } else + xqm->qm_dqzone = qm_dqzone; + + register_shrinker(&xfs_qm_shaker); + + /* + * The t_dqinfo portion of transactions. + */ + if (!qm_dqtrxzone) { + xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t), + "xfs_dqtrx"); + qm_dqtrxzone = xqm->qm_dqtrxzone; + } else + xqm->qm_dqtrxzone = qm_dqtrxzone; + + atomic_set(&xqm->qm_totaldquots, 0); + xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; + xqm->qm_nrefs = 0; + return xqm; + + out_free_udqhash: + kmem_free_large(udqhash); + out: + return NULL; +} + +/* + * Destroy the global quota manager when its reference count goes to zero. + */ +STATIC void +xfs_qm_destroy( + struct xfs_qm *xqm) +{ + struct xfs_dquot *dqp, *n; + int hsize, i; + + ASSERT(xqm != NULL); + ASSERT(xqm->qm_nrefs == 0); + unregister_shrinker(&xfs_qm_shaker); + hsize = xqm->qm_dqhashmask + 1; + for (i = 0; i < hsize; i++) { + xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); + xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); + } + kmem_free_large(xqm->qm_usr_dqhtable); + kmem_free_large(xqm->qm_grp_dqhtable); + xqm->qm_usr_dqhtable = NULL; + xqm->qm_grp_dqhtable = NULL; + xqm->qm_dqhashmask = 0; + + /* frlist cleanup */ + mutex_lock(&xqm->qm_dqfrlist_lock); + list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { + xfs_dqlock(dqp); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + xfs_dqunlock(dqp); + xfs_qm_dqdestroy(dqp); + } + mutex_unlock(&xqm->qm_dqfrlist_lock); + mutex_destroy(&xqm->qm_dqfrlist_lock); + kmem_free(xqm); +} + +/* + * Called at mount time to let XQM know that another file system is + * starting quotas. This isn't crucial information as the individual mount + * structures are pretty independent, but it helps the XQM keep a + * global view of what's going on. + */ +/* ARGSUSED */ +STATIC int +xfs_qm_hold_quotafs_ref( + struct xfs_mount *mp) +{ + /* + * Need to lock the xfs_Gqm structure for things like this. For example, + * the structure could disappear between the entry to this routine and + * a HOLD operation if not locked. + */ + mutex_lock(&xfs_Gqm_lock); + + if (!xfs_Gqm) { + xfs_Gqm = xfs_Gqm_init(); + if (!xfs_Gqm) { + mutex_unlock(&xfs_Gqm_lock); + return ENOMEM; + } + } + + /* + * We can keep a list of all filesystems with quotas mounted for + * debugging and statistical purposes, but ... + * Just take a reference and get out. + */ + xfs_Gqm->qm_nrefs++; + mutex_unlock(&xfs_Gqm_lock); + + return 0; +} + + +/* + * Release the reference that a filesystem took at mount time, + * so that we know when we need to destroy the entire quota manager. + */ +/* ARGSUSED */ +STATIC void +xfs_qm_rele_quotafs_ref( + struct xfs_mount *mp) +{ + xfs_dquot_t *dqp, *n; + + ASSERT(xfs_Gqm); + ASSERT(xfs_Gqm->qm_nrefs > 0); + + /* + * Go thru the freelist and destroy all inactive dquots. + */ + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + + list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) { + xfs_dqlock(dqp); + if (dqp->dq_flags & XFS_DQ_INACTIVE) { + ASSERT(dqp->q_mount == NULL); + ASSERT(! XFS_DQ_IS_DIRTY(dqp)); + ASSERT(list_empty(&dqp->q_hashlist)); + ASSERT(list_empty(&dqp->q_mplist)); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + xfs_dqunlock(dqp); + xfs_qm_dqdestroy(dqp); + } else { + xfs_dqunlock(dqp); + } + } + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + + /* + * Destroy the entire XQM. If somebody mounts with quotaon, this'll + * be restarted. + */ + mutex_lock(&xfs_Gqm_lock); + if (--xfs_Gqm->qm_nrefs == 0) { + xfs_qm_destroy(xfs_Gqm); + xfs_Gqm = NULL; + } + mutex_unlock(&xfs_Gqm_lock); +} + +/* + * Just destroy the quotainfo structure. + */ +void +xfs_qm_unmount( + struct xfs_mount *mp) +{ + if (mp->m_quotainfo) { + xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); + xfs_qm_destroy_quotainfo(mp); + } +} + + +/* + * This is called from xfs_mountfs to start quotas and initialize all + * necessary data structures like quotainfo. This is also responsible for + * running a quotacheck as necessary. We are guaranteed that the superblock + * is consistently read in at this point. + * + * If we fail here, the mount will continue with quota turned off. We don't + * need to inidicate success or failure at all. + */ +void +xfs_qm_mount_quotas( + xfs_mount_t *mp) +{ + int error = 0; + uint sbf; + + /* + * If quotas on realtime volumes is not supported, we disable + * quotas immediately. + */ + if (mp->m_sb.sb_rextents) { + xfs_notice(mp, "Cannot turn on quotas for realtime filesystem"); + mp->m_qflags = 0; + goto write_changes; + } + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * Allocate the quotainfo structure inside the mount struct, and + * create quotainode(s), and change/rev superblock if necessary. + */ + error = xfs_qm_init_quotainfo(mp); + if (error) { + /* + * We must turn off quotas. + */ + ASSERT(mp->m_quotainfo == NULL); + mp->m_qflags = 0; + goto write_changes; + } + /* + * If any of the quotas are not consistent, do a quotacheck. + */ + if (XFS_QM_NEED_QUOTACHECK(mp)) { + error = xfs_qm_quotacheck(mp); + if (error) { + /* Quotacheck failed and disabled quotas. */ + return; + } + } + /* + * If one type of quotas is off, then it will lose its + * quotachecked status, since we won't be doing accounting for + * that type anymore. + */ + if (!XFS_IS_UQUOTA_ON(mp)) + mp->m_qflags &= ~XFS_UQUOTA_CHKD; + if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp))) + mp->m_qflags &= ~XFS_OQUOTA_CHKD; + + write_changes: + /* + * We actually don't have to acquire the m_sb_lock at all. + * This can only be called from mount, and that's single threaded. XXX + */ + spin_lock(&mp->m_sb_lock); + sbf = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL; + spin_unlock(&mp->m_sb_lock); + + if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) { + if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) { + /* + * We could only have been turning quotas off. + * We aren't in very good shape actually because + * the incore structures are convinced that quotas are + * off, but the on disk superblock doesn't know that ! + */ + ASSERT(!(XFS_IS_QUOTA_RUNNING(mp))); + xfs_alert(mp, "%s: Superblock update failed!", + __func__); + } + } + + if (error) { + xfs_warn(mp, "Failed to initialize disk quotas."); + return; + } +} + +/* + * Called from the vfsops layer. + */ +void +xfs_qm_unmount_quotas( + xfs_mount_t *mp) +{ + /* + * Release the dquots that root inode, et al might be holding, + * before we flush quotas and blow away the quotainfo structure. + */ + ASSERT(mp->m_rootip); + xfs_qm_dqdetach(mp->m_rootip); + if (mp->m_rbmip) + xfs_qm_dqdetach(mp->m_rbmip); + if (mp->m_rsumip) + xfs_qm_dqdetach(mp->m_rsumip); + + /* + * Release the quota inodes. + */ + if (mp->m_quotainfo) { + if (mp->m_quotainfo->qi_uquotaip) { + IRELE(mp->m_quotainfo->qi_uquotaip); + mp->m_quotainfo->qi_uquotaip = NULL; + } + if (mp->m_quotainfo->qi_gquotaip) { + IRELE(mp->m_quotainfo->qi_gquotaip); + mp->m_quotainfo->qi_gquotaip = NULL; + } + } +} + +/* + * Flush all dquots of the given file system to disk. The dquots are + * _not_ purged from memory here, just their data written to disk. + */ +STATIC int +xfs_qm_dqflush_all( + struct xfs_mount *mp, + int sync_mode) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + int recl; + struct xfs_dquot *dqp; + int error; + + if (!q) + return 0; +again: + mutex_lock(&q->qi_dqlist_lock); + list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { + xfs_dqlock(dqp); + if (! XFS_DQ_IS_DIRTY(dqp)) { + xfs_dqunlock(dqp); + continue; + } + + /* XXX a sentinel would be better */ + recl = q->qi_dqreclaims; + if (!xfs_dqflock_nowait(dqp)) { + /* + * If we can't grab the flush lock then check + * to see if the dquot has been flushed delayed + * write. If so, grab its buffer and send it + * out immediately. We'll be able to acquire + * the flush lock when the I/O completes. + */ + xfs_qm_dqflock_pushbuf_wait(dqp); + } + /* + * Let go of the mplist lock. We don't want to hold it + * across a disk write. + */ + mutex_unlock(&q->qi_dqlist_lock); + error = xfs_qm_dqflush(dqp, sync_mode); + xfs_dqunlock(dqp); + if (error) + return error; + + mutex_lock(&q->qi_dqlist_lock); + if (recl != q->qi_dqreclaims) { + mutex_unlock(&q->qi_dqlist_lock); + /* XXX restart limit */ + goto again; + } + } + + mutex_unlock(&q->qi_dqlist_lock); + /* return ! busy */ + return 0; +} +/* + * Release the group dquot pointers the user dquots may be + * carrying around as a hint. mplist is locked on entry and exit. + */ +STATIC void +xfs_qm_detach_gdquots( + struct xfs_mount *mp) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + struct xfs_dquot *dqp, *gdqp; + int nrecl; + + again: + ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); + list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { + xfs_dqlock(dqp); + if ((gdqp = dqp->q_gdquot)) { + xfs_dqlock(gdqp); + dqp->q_gdquot = NULL; + } + xfs_dqunlock(dqp); + + if (gdqp) { + /* + * Can't hold the mplist lock across a dqput. + * XXXmust convert to marker based iterations here. + */ + nrecl = q->qi_dqreclaims; + mutex_unlock(&q->qi_dqlist_lock); + xfs_qm_dqput(gdqp); + + mutex_lock(&q->qi_dqlist_lock); + if (nrecl != q->qi_dqreclaims) + goto again; + } + } +} + +/* + * Go through all the incore dquots of this file system and take them + * off the mplist and hashlist, if the dquot type matches the dqtype + * parameter. This is used when turning off quota accounting for + * users and/or groups, as well as when the filesystem is unmounting. + */ +STATIC int +xfs_qm_dqpurge_int( + struct xfs_mount *mp, + uint flags) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + struct xfs_dquot *dqp, *n; + uint dqtype; + int nrecl; + int nmisses; + + if (!q) + return 0; + + dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0; + dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0; + dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0; + + mutex_lock(&q->qi_dqlist_lock); + + /* + * In the first pass through all incore dquots of this filesystem, + * we release the group dquot pointers the user dquots may be + * carrying around as a hint. We need to do this irrespective of + * what's being turned off. + */ + xfs_qm_detach_gdquots(mp); + + again: + nmisses = 0; + ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); + /* + * Try to get rid of all of the unwanted dquots. The idea is to + * get them off mplist and hashlist, but leave them on freelist. + */ + list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) { + /* + * It's OK to look at the type without taking dqlock here. + * We're holding the mplist lock here, and that's needed for + * a dqreclaim. + */ + if ((dqp->dq_flags & dqtype) == 0) + continue; + + if (!mutex_trylock(&dqp->q_hash->qh_lock)) { + nrecl = q->qi_dqreclaims; + mutex_unlock(&q->qi_dqlist_lock); + mutex_lock(&dqp->q_hash->qh_lock); + mutex_lock(&q->qi_dqlist_lock); + + /* + * XXXTheoretically, we can get into a very long + * ping pong game here. + * No one can be adding dquots to the mplist at + * this point, but somebody might be taking things off. + */ + if (nrecl != q->qi_dqreclaims) { + mutex_unlock(&dqp->q_hash->qh_lock); + goto again; + } + } + + /* + * Take the dquot off the mplist and hashlist. It may remain on + * freelist in INACTIVE state. + */ + nmisses += xfs_qm_dqpurge(dqp); + } + mutex_unlock(&q->qi_dqlist_lock); + return nmisses; +} + +int +xfs_qm_dqpurge_all( + xfs_mount_t *mp, + uint flags) +{ + int ndquots; + + /* + * Purge the dquot cache. + * None of the dquots should really be busy at this point. + */ + if (mp->m_quotainfo) { + while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) { + delay(ndquots * 10); + } + } + return 0; +} + +STATIC int +xfs_qm_dqattach_one( + xfs_inode_t *ip, + xfs_dqid_t id, + uint type, + uint doalloc, + xfs_dquot_t *udqhint, /* hint */ + xfs_dquot_t **IO_idqpp) +{ + xfs_dquot_t *dqp; + int error; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + error = 0; + + /* + * See if we already have it in the inode itself. IO_idqpp is + * &i_udquot or &i_gdquot. This made the code look weird, but + * made the logic a lot simpler. + */ + dqp = *IO_idqpp; + if (dqp) { + trace_xfs_dqattach_found(dqp); + return 0; + } + + /* + * udqhint is the i_udquot field in inode, and is non-NULL only + * when the type arg is group/project. Its purpose is to save a + * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside + * the user dquot. + */ + if (udqhint) { + ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); + xfs_dqlock(udqhint); + + /* + * No need to take dqlock to look at the id. + * + * The ID can't change until it gets reclaimed, and it won't + * be reclaimed as long as we have a ref from inode and we + * hold the ilock. + */ + dqp = udqhint->q_gdquot; + if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { + xfs_dqlock(dqp); + XFS_DQHOLD(dqp); + ASSERT(*IO_idqpp == NULL); + *IO_idqpp = dqp; + + xfs_dqunlock(dqp); + xfs_dqunlock(udqhint); + return 0; + } + + /* + * We can't hold a dquot lock when we call the dqget code. + * We'll deadlock in no time, because of (not conforming to) + * lock ordering - the inodelock comes before any dquot lock, + * and we may drop and reacquire the ilock in xfs_qm_dqget(). + */ + xfs_dqunlock(udqhint); + } + + /* + * Find the dquot from somewhere. This bumps the + * reference count of dquot and returns it locked. + * This can return ENOENT if dquot didn't exist on + * disk and we didn't ask it to allocate; + * ESRCH if quotas got turned off suddenly. + */ + error = xfs_qm_dqget(ip->i_mount, ip, id, type, XFS_QMOPT_DOWARN, &dqp); + if (error) + return error; + + trace_xfs_dqattach_get(dqp); + + /* + * dqget may have dropped and re-acquired the ilock, but it guarantees + * that the dquot returned is the one that should go in the inode. + */ + *IO_idqpp = dqp; + xfs_dqunlock(dqp); + return 0; +} + + +/* + * Given a udquot and gdquot, attach a ptr to the group dquot in the + * udquot as a hint for future lookups. The idea sounds simple, but the + * execution isn't, because the udquot might have a group dquot attached + * already and getting rid of that gets us into lock ordering constraints. + * The process is complicated more by the fact that the dquots may or may not + * be locked on entry. + */ +STATIC void +xfs_qm_dqattach_grouphint( + xfs_dquot_t *udq, + xfs_dquot_t *gdq) +{ + xfs_dquot_t *tmp; + + xfs_dqlock(udq); + + if ((tmp = udq->q_gdquot)) { + if (tmp == gdq) { + xfs_dqunlock(udq); + return; + } + + udq->q_gdquot = NULL; + /* + * We can't keep any dqlocks when calling dqrele, + * because the freelist lock comes before dqlocks. + */ + xfs_dqunlock(udq); + /* + * we took a hard reference once upon a time in dqget, + * so give it back when the udquot no longer points at it + * dqput() does the unlocking of the dquot. + */ + xfs_qm_dqrele(tmp); + + xfs_dqlock(udq); + xfs_dqlock(gdq); + + } else { + ASSERT(XFS_DQ_IS_LOCKED(udq)); + xfs_dqlock(gdq); + } + + ASSERT(XFS_DQ_IS_LOCKED(udq)); + ASSERT(XFS_DQ_IS_LOCKED(gdq)); + /* + * Somebody could have attached a gdquot here, + * when we dropped the uqlock. If so, just do nothing. + */ + if (udq->q_gdquot == NULL) { + XFS_DQHOLD(gdq); + udq->q_gdquot = gdq; + } + + xfs_dqunlock(gdq); + xfs_dqunlock(udq); +} + + +/* + * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON + * into account. + * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed. + * Inode may get unlocked and relocked in here, and the caller must deal with + * the consequences. + */ +int +xfs_qm_dqattach_locked( + xfs_inode_t *ip, + uint flags) +{ + xfs_mount_t *mp = ip->i_mount; + uint nquotas = 0; + int error = 0; + + if (!XFS_IS_QUOTA_RUNNING(mp) || + !XFS_IS_QUOTA_ON(mp) || + !XFS_NOT_DQATTACHED(mp, ip) || + ip->i_ino == mp->m_sb.sb_uquotino || + ip->i_ino == mp->m_sb.sb_gquotino) + return 0; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + if (XFS_IS_UQUOTA_ON(mp)) { + error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER, + flags & XFS_QMOPT_DQALLOC, + NULL, &ip->i_udquot); + if (error) + goto done; + nquotas++; + } + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (XFS_IS_OQUOTA_ON(mp)) { + error = XFS_IS_GQUOTA_ON(mp) ? + xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, + flags & XFS_QMOPT_DQALLOC, + ip->i_udquot, &ip->i_gdquot) : + xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, + flags & XFS_QMOPT_DQALLOC, + ip->i_udquot, &ip->i_gdquot); + /* + * Don't worry about the udquot that we may have + * attached above. It'll get detached, if not already. + */ + if (error) + goto done; + nquotas++; + } + + /* + * Attach this group quota to the user quota as a hint. + * This WON'T, in general, result in a thrash. + */ + if (nquotas == 2) { + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(ip->i_udquot); + ASSERT(ip->i_gdquot); + + /* + * We may or may not have the i_udquot locked at this point, + * but this check is OK since we don't depend on the i_gdquot to + * be accurate 100% all the time. It is just a hint, and this + * will succeed in general. + */ + if (ip->i_udquot->q_gdquot == ip->i_gdquot) + goto done; + /* + * Attach i_gdquot to the gdquot hint inside the i_udquot. + */ + xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot); + } + + done: +#ifdef DEBUG + if (!error) { + if (XFS_IS_UQUOTA_ON(mp)) + ASSERT(ip->i_udquot); + if (XFS_IS_OQUOTA_ON(mp)) + ASSERT(ip->i_gdquot); + } + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); +#endif + return error; +} + +int +xfs_qm_dqattach( + struct xfs_inode *ip, + uint flags) +{ + int error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_qm_dqattach_locked(ip, flags); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + return error; +} + +/* + * Release dquots (and their references) if any. + * The inode should be locked EXCL except when this's called by + * xfs_ireclaim. + */ +void +xfs_qm_dqdetach( + xfs_inode_t *ip) +{ + if (!(ip->i_udquot || ip->i_gdquot)) + return; + + trace_xfs_dquot_dqdetach(ip); + + ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino); + ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino); + if (ip->i_udquot) { + xfs_qm_dqrele(ip->i_udquot); + ip->i_udquot = NULL; + } + if (ip->i_gdquot) { + xfs_qm_dqrele(ip->i_gdquot); + ip->i_gdquot = NULL; + } +} + +int +xfs_qm_sync( + struct xfs_mount *mp, + int flags) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + int recl, restarts; + struct xfs_dquot *dqp; + int error; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return 0; + + restarts = 0; + + again: + mutex_lock(&q->qi_dqlist_lock); + /* + * dqpurge_all() also takes the mplist lock and iterate thru all dquots + * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared + * when we have the mplist lock, we know that dquots will be consistent + * as long as we have it locked. + */ + if (!XFS_IS_QUOTA_ON(mp)) { + mutex_unlock(&q->qi_dqlist_lock); + return 0; + } + ASSERT(mutex_is_locked(&q->qi_dqlist_lock)); + list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) { + /* + * If this is vfs_sync calling, then skip the dquots that + * don't 'seem' to be dirty. ie. don't acquire dqlock. + * This is very similar to what xfs_sync does with inodes. + */ + if (flags & SYNC_TRYLOCK) { + if (!XFS_DQ_IS_DIRTY(dqp)) + continue; + if (!xfs_qm_dqlock_nowait(dqp)) + continue; + } else { + xfs_dqlock(dqp); + } + + /* + * Now, find out for sure if this dquot is dirty or not. + */ + if (! XFS_DQ_IS_DIRTY(dqp)) { + xfs_dqunlock(dqp); + continue; + } + + /* XXX a sentinel would be better */ + recl = q->qi_dqreclaims; + if (!xfs_dqflock_nowait(dqp)) { + if (flags & SYNC_TRYLOCK) { + xfs_dqunlock(dqp); + continue; + } + /* + * If we can't grab the flush lock then if the caller + * really wanted us to give this our best shot, so + * see if we can give a push to the buffer before we wait + * on the flush lock. At this point, we know that + * even though the dquot is being flushed, + * it has (new) dirty data. + */ + xfs_qm_dqflock_pushbuf_wait(dqp); + } + /* + * Let go of the mplist lock. We don't want to hold it + * across a disk write + */ + mutex_unlock(&q->qi_dqlist_lock); + error = xfs_qm_dqflush(dqp, flags); + xfs_dqunlock(dqp); + if (error && XFS_FORCED_SHUTDOWN(mp)) + return 0; /* Need to prevent umount failure */ + else if (error) + return error; + + mutex_lock(&q->qi_dqlist_lock); + if (recl != q->qi_dqreclaims) { + if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS) + break; + + mutex_unlock(&q->qi_dqlist_lock); + goto again; + } + } + + mutex_unlock(&q->qi_dqlist_lock); + return 0; +} + +/* + * The hash chains and the mplist use the same xfs_dqhash structure as + * their list head, but we can take the mplist qh_lock and one of the + * hash qh_locks at the same time without any problem as they aren't + * related. + */ +static struct lock_class_key xfs_quota_mplist_class; + +/* + * This initializes all the quota information that's kept in the + * mount structure + */ +STATIC int +xfs_qm_init_quotainfo( + xfs_mount_t *mp) +{ + xfs_quotainfo_t *qinf; + int error; + xfs_dquot_t *dqp; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * Tell XQM that we exist as soon as possible. + */ + if ((error = xfs_qm_hold_quotafs_ref(mp))) { + return error; + } + + qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); + + /* + * See if quotainodes are setup, and if not, allocate them, + * and change the superblock accordingly. + */ + if ((error = xfs_qm_init_quotainos(mp))) { + kmem_free(qinf); + mp->m_quotainfo = NULL; + return error; + } + + INIT_LIST_HEAD(&qinf->qi_dqlist); + mutex_init(&qinf->qi_dqlist_lock); + lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class); + + qinf->qi_dqreclaims = 0; + + /* mutex used to serialize quotaoffs */ + mutex_init(&qinf->qi_quotaofflock); + + /* Precalc some constants */ + qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); + ASSERT(qinf->qi_dqchunklen); + qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen); + do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t)); + + mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); + + /* + * We try to get the limits from the superuser's limits fields. + * This is quite hacky, but it is standard quota practice. + * We look at the USR dquot with id == 0 first, but if user quotas + * are not enabled we goto the GRP dquot with id == 0. + * We don't really care to keep separate default limits for user + * and group quotas, at least not at this point. + */ + error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0, + XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : + (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : + XFS_DQ_PROJ), + XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN, + &dqp); + if (! error) { + xfs_disk_dquot_t *ddqp = &dqp->q_core; + + /* + * The warnings and timers set the grace period given to + * a user or group before he or she can not perform any + * more writing. If it is zero, a default is used. + */ + qinf->qi_btimelimit = ddqp->d_btimer ? + be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT; + qinf->qi_itimelimit = ddqp->d_itimer ? + be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT; + qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ? + be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT; + qinf->qi_bwarnlimit = ddqp->d_bwarns ? + be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT; + qinf->qi_iwarnlimit = ddqp->d_iwarns ? + be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; + qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? + be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; + qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); + qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); + qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); + qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); + qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); + qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); + + /* + * We sent the XFS_QMOPT_DQSUSER flag to dqget because + * we don't want this dquot cached. We haven't done a + * quotacheck yet, and quotacheck doesn't like incore dquots. + */ + xfs_qm_dqdestroy(dqp); + } else { + qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; + qinf->qi_itimelimit = XFS_QM_ITIMELIMIT; + qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT; + qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT; + qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT; + qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; + } + + return 0; +} + + +/* + * Gets called when unmounting a filesystem or when all quotas get + * turned off. + * This purges the quota inodes, destroys locks and frees itself. + */ +void +xfs_qm_destroy_quotainfo( + xfs_mount_t *mp) +{ + xfs_quotainfo_t *qi; + + qi = mp->m_quotainfo; + ASSERT(qi != NULL); + ASSERT(xfs_Gqm != NULL); + + /* + * Release the reference that XQM kept, so that we know + * when the XQM structure should be freed. We cannot assume + * that xfs_Gqm is non-null after this point. + */ + xfs_qm_rele_quotafs_ref(mp); + + ASSERT(list_empty(&qi->qi_dqlist)); + mutex_destroy(&qi->qi_dqlist_lock); + + if (qi->qi_uquotaip) { + IRELE(qi->qi_uquotaip); + qi->qi_uquotaip = NULL; /* paranoia */ + } + if (qi->qi_gquotaip) { + IRELE(qi->qi_gquotaip); + qi->qi_gquotaip = NULL; + } + mutex_destroy(&qi->qi_quotaofflock); + kmem_free(qi); + mp->m_quotainfo = NULL; +} + + + +/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */ + +/* ARGSUSED */ +STATIC void +xfs_qm_list_init( + xfs_dqlist_t *list, + char *str, + int n) +{ + mutex_init(&list->qh_lock); + INIT_LIST_HEAD(&list->qh_list); + list->qh_version = 0; + list->qh_nelems = 0; +} + +STATIC void +xfs_qm_list_destroy( + xfs_dqlist_t *list) +{ + mutex_destroy(&(list->qh_lock)); +} + +/* + * Create an inode and return with a reference already taken, but unlocked + * This is how we create quota inodes + */ +STATIC int +xfs_qm_qino_alloc( + xfs_mount_t *mp, + xfs_inode_t **ip, + __int64_t sbfields, + uint flags) +{ + xfs_trans_t *tp; + int error; + int committed; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE); + if ((error = xfs_trans_reserve(tp, + XFS_QM_QINOCREATE_SPACE_RES(mp), + XFS_CREATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_CREATE_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return error; + } + + error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); + if (error) { + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | + XFS_TRANS_ABORT); + return error; + } + + /* + * Make the changes in the superblock, and log those too. + * sbfields arg may contain fields other than *QUOTINO; + * VERSIONNUM for example. + */ + spin_lock(&mp->m_sb_lock); + if (flags & XFS_QMOPT_SBVERSION) { + ASSERT(!xfs_sb_version_hasquota(&mp->m_sb)); + ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | + XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) == + (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | + XFS_SB_GQUOTINO | XFS_SB_QFLAGS)); + + xfs_sb_version_addquota(&mp->m_sb); + mp->m_sb.sb_uquotino = NULLFSINO; + mp->m_sb.sb_gquotino = NULLFSINO; + + /* qflags will get updated _after_ quotacheck */ + mp->m_sb.sb_qflags = 0; + } + if (flags & XFS_QMOPT_UQUOTA) + mp->m_sb.sb_uquotino = (*ip)->i_ino; + else + mp->m_sb.sb_gquotino = (*ip)->i_ino; + spin_unlock(&mp->m_sb_lock); + xfs_mod_sb(tp, sbfields); + + if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) { + xfs_alert(mp, "%s failed (error %d)!", __func__, error); + return error; + } + return 0; +} + + +STATIC void +xfs_qm_reset_dqcounts( + xfs_mount_t *mp, + xfs_buf_t *bp, + xfs_dqid_t id, + uint type) +{ + xfs_disk_dquot_t *ddq; + int j; + + trace_xfs_reset_dqcounts(bp, _RET_IP_); + + /* + * Reset all counters and timers. They'll be + * started afresh by xfs_qm_quotacheck. + */ +#ifdef DEBUG + j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); + do_div(j, sizeof(xfs_dqblk_t)); + ASSERT(mp->m_quotainfo->qi_dqperchunk == j); +#endif + ddq = bp->b_addr; + for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) { + /* + * Do a sanity check, and if needed, repair the dqblk. Don't + * output any warnings because it's perfectly possible to + * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. + */ + (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, + "xfs_quotacheck"); + ddq->d_bcount = 0; + ddq->d_icount = 0; + ddq->d_rtbcount = 0; + ddq->d_btimer = 0; + ddq->d_itimer = 0; + ddq->d_rtbtimer = 0; + ddq->d_bwarns = 0; + ddq->d_iwarns = 0; + ddq->d_rtbwarns = 0; + ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1); + } +} + +STATIC int +xfs_qm_dqiter_bufs( + xfs_mount_t *mp, + xfs_dqid_t firstid, + xfs_fsblock_t bno, + xfs_filblks_t blkcnt, + uint flags) +{ + xfs_buf_t *bp; + int error; + int type; + + ASSERT(blkcnt > 0); + type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER : + (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP); + error = 0; + + /* + * Blkcnt arg can be a very big number, and might even be + * larger than the log itself. So, we have to break it up into + * manageable-sized transactions. + * Note that we don't start a permanent transaction here; we might + * not be able to get a log reservation for the whole thing up front, + * and we don't really care to either, because we just discard + * everything if we were to crash in the middle of this loop. + */ + while (blkcnt--) { + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, bno), + mp->m_quotainfo->qi_dqchunklen, 0, &bp); + if (error) + break; + + xfs_qm_reset_dqcounts(mp, bp, firstid, type); + xfs_bdwrite(mp, bp); + /* + * goto the next block. + */ + bno++; + firstid += mp->m_quotainfo->qi_dqperchunk; + } + return error; +} + +/* + * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a + * caller supplied function for every chunk of dquots that we find. + */ +STATIC int +xfs_qm_dqiterate( + xfs_mount_t *mp, + xfs_inode_t *qip, + uint flags) +{ + xfs_bmbt_irec_t *map; + int i, nmaps; /* number of map entries */ + int error; /* return value */ + xfs_fileoff_t lblkno; + xfs_filblks_t maxlblkcnt; + xfs_dqid_t firstid; + xfs_fsblock_t rablkno; + xfs_filblks_t rablkcnt; + + error = 0; + /* + * This looks racy, but we can't keep an inode lock across a + * trans_reserve. But, this gets called during quotacheck, and that + * happens only at mount time which is single threaded. + */ + if (qip->i_d.di_nblocks == 0) + return 0; + + map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP); + + lblkno = 0; + maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); + do { + nmaps = XFS_DQITER_MAP_SIZE; + /* + * We aren't changing the inode itself. Just changing + * some of its data. No new blocks are added here, and + * the inode is never added to the transaction. + */ + xfs_ilock(qip, XFS_ILOCK_SHARED); + error = xfs_bmapi(NULL, qip, lblkno, + maxlblkcnt - lblkno, + XFS_BMAPI_METADATA, + NULL, + 0, map, &nmaps, NULL); + xfs_iunlock(qip, XFS_ILOCK_SHARED); + if (error) + break; + + ASSERT(nmaps <= XFS_DQITER_MAP_SIZE); + for (i = 0; i < nmaps; i++) { + ASSERT(map[i].br_startblock != DELAYSTARTBLOCK); + ASSERT(map[i].br_blockcount); + + + lblkno += map[i].br_blockcount; + + if (map[i].br_startblock == HOLESTARTBLOCK) + continue; + + firstid = (xfs_dqid_t) map[i].br_startoff * + mp->m_quotainfo->qi_dqperchunk; + /* + * Do a read-ahead on the next extent. + */ + if ((i+1 < nmaps) && + (map[i+1].br_startblock != HOLESTARTBLOCK)) { + rablkcnt = map[i+1].br_blockcount; + rablkno = map[i+1].br_startblock; + while (rablkcnt--) { + xfs_buf_readahead(mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, rablkno), + mp->m_quotainfo->qi_dqchunklen); + rablkno++; + } + } + /* + * Iterate thru all the blks in the extent and + * reset the counters of all the dquots inside them. + */ + if ((error = xfs_qm_dqiter_bufs(mp, + firstid, + map[i].br_startblock, + map[i].br_blockcount, + flags))) { + break; + } + } + + if (error) + break; + } while (nmaps > 0); + + kmem_free(map); + + return error; +} + +/* + * Called by dqusage_adjust in doing a quotacheck. + * + * Given the inode, and a dquot id this updates both the incore dqout as well + * as the buffer copy. This is so that once the quotacheck is done, we can + * just log all the buffers, as opposed to logging numerous updates to + * individual dquots. + */ +STATIC int +xfs_qm_quotacheck_dqadjust( + struct xfs_inode *ip, + xfs_dqid_t id, + uint type, + xfs_qcnt_t nblks, + xfs_qcnt_t rtblks) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_dquot *dqp; + int error; + + error = xfs_qm_dqget(mp, ip, id, type, + XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); + if (error) { + /* + * Shouldn't be able to turn off quotas here. + */ + ASSERT(error != ESRCH); + ASSERT(error != ENOENT); + return error; + } + + trace_xfs_dqadjust(dqp); + + /* + * Adjust the inode count and the block count to reflect this inode's + * resource usage. + */ + be64_add_cpu(&dqp->q_core.d_icount, 1); + dqp->q_res_icount++; + if (nblks) { + be64_add_cpu(&dqp->q_core.d_bcount, nblks); + dqp->q_res_bcount += nblks; + } + if (rtblks) { + be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks); + dqp->q_res_rtbcount += rtblks; + } + + /* + * Set default limits, adjust timers (since we changed usages) + * + * There are no timers for the default values set in the root dquot. + */ + if (dqp->q_core.d_id) { + xfs_qm_adjust_dqlimits(mp, &dqp->q_core); + xfs_qm_adjust_dqtimers(mp, &dqp->q_core); + } + + dqp->dq_flags |= XFS_DQ_DIRTY; + xfs_qm_dqput(dqp); + return 0; +} + +STATIC int +xfs_qm_get_rtblks( + xfs_inode_t *ip, + xfs_qcnt_t *O_rtblks) +{ + xfs_filblks_t rtblks; /* total rt blks */ + xfs_extnum_t idx; /* extent record index */ + xfs_ifork_t *ifp; /* inode fork pointer */ + xfs_extnum_t nextents; /* number of extent entries */ + int error; + + ASSERT(XFS_IS_REALTIME_INODE(ip)); + ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); + if (!(ifp->if_flags & XFS_IFEXTENTS)) { + if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK))) + return error; + } + rtblks = 0; + nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); + for (idx = 0; idx < nextents; idx++) + rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx)); + *O_rtblks = (xfs_qcnt_t)rtblks; + return 0; +} + +/* + * callback routine supplied to bulkstat(). Given an inumber, find its + * dquots and update them to account for resources taken by that inode. + */ +/* ARGSUSED */ +STATIC int +xfs_qm_dqusage_adjust( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* not used */ + int ubsize, /* not used */ + int *ubused, /* not used */ + int *res) /* result code value */ +{ + xfs_inode_t *ip; + xfs_qcnt_t nblks, rtblks = 0; + int error; + + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * rootino must have its resources accounted for, not so with the quota + * inodes. + */ + if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) { + *res = BULKSTAT_RV_NOTHING; + return XFS_ERROR(EINVAL); + } + + /* + * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget + * interface expects the inode to be exclusively locked because that's + * the case in all other instances. It's OK that we do this because + * quotacheck is done only at mount time. + */ + error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); + if (error) { + *res = BULKSTAT_RV_NOTHING; + return error; + } + + ASSERT(ip->i_delayed_blks == 0); + + if (XFS_IS_REALTIME_INODE(ip)) { + /* + * Walk thru the extent list and count the realtime blocks. + */ + error = xfs_qm_get_rtblks(ip, &rtblks); + if (error) + goto error0; + } + + nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; + + /* + * Add the (disk blocks and inode) resources occupied by this + * inode to its dquots. We do this adjustment in the incore dquot, + * and also copy the changes to its buffer. + * We don't care about putting these changes in a transaction + * envelope because if we crash in the middle of a 'quotacheck' + * we have to start from the beginning anyway. + * Once we're done, we'll log all the dquot bufs. + * + * The *QUOTA_ON checks below may look pretty racy, but quotachecks + * and quotaoffs don't race. (Quotachecks happen at mount time only). + */ + if (XFS_IS_UQUOTA_ON(mp)) { + error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, + XFS_DQ_USER, nblks, rtblks); + if (error) + goto error0; + } + + if (XFS_IS_GQUOTA_ON(mp)) { + error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, + XFS_DQ_GROUP, nblks, rtblks); + if (error) + goto error0; + } + + if (XFS_IS_PQUOTA_ON(mp)) { + error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), + XFS_DQ_PROJ, nblks, rtblks); + if (error) + goto error0; + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); + *res = BULKSTAT_RV_DIDONE; + return 0; + +error0: + xfs_iunlock(ip, XFS_ILOCK_EXCL); + IRELE(ip); + *res = BULKSTAT_RV_GIVEUP; + return error; +} + +/* + * Walk thru all the filesystem inodes and construct a consistent view + * of the disk quota world. If the quotacheck fails, disable quotas. + */ +int +xfs_qm_quotacheck( + xfs_mount_t *mp) +{ + int done, count, error; + xfs_ino_t lastino; + size_t structsz; + xfs_inode_t *uip, *gip; + uint flags; + + count = INT_MAX; + structsz = 1; + lastino = 0; + flags = 0; + + ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip); + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + /* + * There should be no cached dquots. The (simplistic) quotacheck + * algorithm doesn't like that. + */ + ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist)); + + xfs_notice(mp, "Quotacheck needed: Please wait."); + + /* + * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset + * their counters to zero. We need a clean slate. + * We don't log our changes till later. + */ + uip = mp->m_quotainfo->qi_uquotaip; + if (uip) { + error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA); + if (error) + goto error_return; + flags |= XFS_UQUOTA_CHKD; + } + + gip = mp->m_quotainfo->qi_gquotaip; + if (gip) { + error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ? + XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); + if (error) + goto error_return; + flags |= XFS_OQUOTA_CHKD; + } + + do { + /* + * Iterate thru all the inodes in the file system, + * adjusting the corresponding dquot counters in core. + */ + error = xfs_bulkstat(mp, &lastino, &count, + xfs_qm_dqusage_adjust, + structsz, NULL, &done); + if (error) + break; + + } while (!done); + + /* + * We've made all the changes that we need to make incore. + * Flush them down to disk buffers if everything was updated + * successfully. + */ + if (!error) + error = xfs_qm_dqflush_all(mp, 0); + + /* + * We can get this error if we couldn't do a dquot allocation inside + * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the + * dirty dquots that might be cached, we just want to get rid of them + * and turn quotaoff. The dquots won't be attached to any of the inodes + * at this point (because we intentionally didn't in dqget_noattach). + */ + if (error) { + xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL); + goto error_return; + } + + /* + * We didn't log anything, because if we crashed, we'll have to + * start the quotacheck from scratch anyway. However, we must make + * sure that our dquot changes are secure before we put the + * quotacheck'd stamp on the superblock. So, here we do a synchronous + * flush. + */ + XFS_bflush(mp->m_ddev_targp); + + /* + * If one type of quotas is off, then it will lose its + * quotachecked status, since we won't be doing accounting for + * that type anymore. + */ + mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); + mp->m_qflags |= flags; + + error_return: + if (error) { + xfs_warn(mp, + "Quotacheck: Unsuccessful (Error %d): Disabling quotas.", + error); + /* + * We must turn off quotas. + */ + ASSERT(mp->m_quotainfo != NULL); + ASSERT(xfs_Gqm != NULL); + xfs_qm_destroy_quotainfo(mp); + if (xfs_mount_reset_sbqflags(mp)) { + xfs_warn(mp, + "Quotacheck: Failed to reset quota flags."); + } + } else + xfs_notice(mp, "Quotacheck: Done."); + return (error); +} + +/* + * This is called after the superblock has been read in and we're ready to + * iget the quota inodes. + */ +STATIC int +xfs_qm_init_quotainos( + xfs_mount_t *mp) +{ + xfs_inode_t *uip, *gip; + int error; + __int64_t sbflags; + uint flags; + + ASSERT(mp->m_quotainfo); + uip = gip = NULL; + sbflags = 0; + flags = 0; + + /* + * Get the uquota and gquota inodes + */ + if (xfs_sb_version_hasquota(&mp->m_sb)) { + if (XFS_IS_UQUOTA_ON(mp) && + mp->m_sb.sb_uquotino != NULLFSINO) { + ASSERT(mp->m_sb.sb_uquotino > 0); + if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, + 0, 0, &uip))) + return XFS_ERROR(error); + } + if (XFS_IS_OQUOTA_ON(mp) && + mp->m_sb.sb_gquotino != NULLFSINO) { + ASSERT(mp->m_sb.sb_gquotino > 0); + if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, + 0, 0, &gip))) { + if (uip) + IRELE(uip); + return XFS_ERROR(error); + } + } + } else { + flags |= XFS_QMOPT_SBVERSION; + sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | + XFS_SB_GQUOTINO | XFS_SB_QFLAGS); + } + + /* + * Create the two inodes, if they don't exist already. The changes + * made above will get added to a transaction and logged in one of + * the qino_alloc calls below. If the device is readonly, + * temporarily switch to read-write to do this. + */ + if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) { + if ((error = xfs_qm_qino_alloc(mp, &uip, + sbflags | XFS_SB_UQUOTINO, + flags | XFS_QMOPT_UQUOTA))) + return XFS_ERROR(error); + + flags &= ~XFS_QMOPT_SBVERSION; + } + if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) { + flags |= (XFS_IS_GQUOTA_ON(mp) ? + XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA); + error = xfs_qm_qino_alloc(mp, &gip, + sbflags | XFS_SB_GQUOTINO, flags); + if (error) { + if (uip) + IRELE(uip); + + return XFS_ERROR(error); + } + } + + mp->m_quotainfo->qi_uquotaip = uip; + mp->m_quotainfo->qi_gquotaip = gip; + + return 0; +} + + + +/* + * Just pop the least recently used dquot off the freelist and + * recycle it. The returned dquot is locked. + */ +STATIC xfs_dquot_t * +xfs_qm_dqreclaim_one(void) +{ + xfs_dquot_t *dqpout; + xfs_dquot_t *dqp; + int restarts; + int startagain; + + restarts = 0; + dqpout = NULL; + + /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */ +again: + startagain = 0; + mutex_lock(&xfs_Gqm->qm_dqfrlist_lock); + + list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) { + struct xfs_mount *mp = dqp->q_mount; + xfs_dqlock(dqp); + + /* + * We are racing with dqlookup here. Naturally we don't + * want to reclaim a dquot that lookup wants. We release the + * freelist lock and start over, so that lookup will grab + * both the dquot and the freelistlock. + */ + if (dqp->dq_flags & XFS_DQ_WANT) { + ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE)); + + trace_xfs_dqreclaim_want(dqp); + XQM_STATS_INC(xqmstats.xs_qm_dqwants); + restarts++; + startagain = 1; + goto dqunlock; + } + + /* + * If the dquot is inactive, we are assured that it is + * not on the mplist or the hashlist, and that makes our + * life easier. + */ + if (dqp->dq_flags & XFS_DQ_INACTIVE) { + ASSERT(mp == NULL); + ASSERT(! XFS_DQ_IS_DIRTY(dqp)); + ASSERT(list_empty(&dqp->q_hashlist)); + ASSERT(list_empty(&dqp->q_mplist)); + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + dqpout = dqp; + XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims); + goto dqunlock; + } + + ASSERT(dqp->q_hash); + ASSERT(!list_empty(&dqp->q_mplist)); + + /* + * Try to grab the flush lock. If this dquot is in the process + * of getting flushed to disk, we don't want to reclaim it. + */ + if (!xfs_dqflock_nowait(dqp)) + goto dqunlock; + + /* + * We have the flush lock so we know that this is not in the + * process of being flushed. So, if this is dirty, flush it + * DELWRI so that we don't get a freelist infested with + * dirty dquots. + */ + if (XFS_DQ_IS_DIRTY(dqp)) { + int error; + + trace_xfs_dqreclaim_dirty(dqp); + + /* + * We flush it delayed write, so don't bother + * releasing the freelist lock. + */ + error = xfs_qm_dqflush(dqp, 0); + if (error) { + xfs_warn(mp, "%s: dquot %p flush failed", + __func__, dqp); + } + goto dqunlock; + } + + /* + * We're trying to get the hashlock out of order. This races + * with dqlookup; so, we giveup and goto the next dquot if + * we couldn't get the hashlock. This way, we won't starve + * a dqlookup process that holds the hashlock that is + * waiting for the freelist lock. + */ + if (!mutex_trylock(&dqp->q_hash->qh_lock)) { + restarts++; + goto dqfunlock; + } + + /* + * This races with dquot allocation code as well as dqflush_all + * and reclaim code. So, if we failed to grab the mplist lock, + * giveup everything and start over. + */ + if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) { + restarts++; + startagain = 1; + goto qhunlock; + } + + ASSERT(dqp->q_nrefs == 0); + list_del_init(&dqp->q_mplist); + mp->m_quotainfo->qi_dquots--; + mp->m_quotainfo->qi_dqreclaims++; + list_del_init(&dqp->q_hashlist); + dqp->q_hash->qh_version++; + list_del_init(&dqp->q_freelist); + xfs_Gqm->qm_dqfrlist_cnt--; + dqpout = dqp; + mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock); +qhunlock: + mutex_unlock(&dqp->q_hash->qh_lock); +dqfunlock: + xfs_dqfunlock(dqp); +dqunlock: + xfs_dqunlock(dqp); + if (dqpout) + break; + if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS) + break; + if (startagain) { + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + goto again; + } + } + mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock); + return dqpout; +} + +/* + * Traverse the freelist of dquots and attempt to reclaim a maximum of + * 'howmany' dquots. This operation races with dqlookup(), and attempts to + * favor the lookup function ... + */ +STATIC int +xfs_qm_shake_freelist( + int howmany) +{ + int nreclaimed = 0; + xfs_dquot_t *dqp; + + if (howmany <= 0) + return 0; + + while (nreclaimed < howmany) { + dqp = xfs_qm_dqreclaim_one(); + if (!dqp) + return nreclaimed; + xfs_qm_dqdestroy(dqp); + nreclaimed++; + } + return nreclaimed; +} + +/* + * The kmem_shake interface is invoked when memory is running low. + */ +/* ARGSUSED */ +STATIC int +xfs_qm_shake( + struct shrinker *shrink, + struct shrink_control *sc) +{ + int ndqused, nfree, n; + gfp_t gfp_mask = sc->gfp_mask; + + if (!kmem_shake_allow(gfp_mask)) + return 0; + if (!xfs_Gqm) + return 0; + + nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */ + /* incore dquots in all f/s's */ + ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree; + + ASSERT(ndqused >= 0); + + if (nfree <= ndqused && nfree < ndquot) + return 0; + + ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */ + n = nfree - ndqused - ndquot; /* # over target */ + + return xfs_qm_shake_freelist(MAX(nfree, n)); +} + + +/*------------------------------------------------------------------*/ + +/* + * Return a new incore dquot. Depending on the number of + * dquots in the system, we either allocate a new one on the kernel heap, + * or reclaim a free one. + * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed + * to reclaim an existing one from the freelist. + */ +boolean_t +xfs_qm_dqalloc_incore( + xfs_dquot_t **O_dqpp) +{ + xfs_dquot_t *dqp; + + /* + * Check against high water mark to see if we want to pop + * a nincompoop dquot off the freelist. + */ + if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) { + /* + * Try to recycle a dquot from the freelist. + */ + if ((dqp = xfs_qm_dqreclaim_one())) { + XQM_STATS_INC(xqmstats.xs_qm_dqreclaims); + /* + * Just zero the core here. The rest will get + * reinitialized by caller. XXX we shouldn't even + * do this zero ... + */ + memset(&dqp->q_core, 0, sizeof(dqp->q_core)); + *O_dqpp = dqp; + return B_FALSE; + } + XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses); + } + + /* + * Allocate a brand new dquot on the kernel heap and return it + * to the caller to initialize. + */ + ASSERT(xfs_Gqm->qm_dqzone != NULL); + *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP); + atomic_inc(&xfs_Gqm->qm_totaldquots); + + return B_TRUE; +} + + +/* + * Start a transaction and write the incore superblock changes to + * disk. flags parameter indicates which fields have changed. + */ +int +xfs_qm_write_sb_changes( + xfs_mount_t *mp, + __int64_t flags) +{ + xfs_trans_t *tp; + int error; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); + if ((error = xfs_trans_reserve(tp, 0, + mp->m_sb.sb_sectsize + 128, 0, + 0, + XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_mod_sb(tp, flags); + error = xfs_trans_commit(tp, 0); + + return error; +} + + +/* --------------- utility functions for vnodeops ---------------- */ + + +/* + * Given an inode, a uid, gid and prid make sure that we have + * allocated relevant dquot(s) on disk, and that we won't exceed inode + * quotas by creating this file. + * This also attaches dquot(s) to the given inode after locking it, + * and returns the dquots corresponding to the uid and/or gid. + * + * in : inode (unlocked) + * out : udquot, gdquot with references taken and unlocked + */ +int +xfs_qm_vop_dqalloc( + struct xfs_inode *ip, + uid_t uid, + gid_t gid, + prid_t prid, + uint flags, + struct xfs_dquot **O_udqpp, + struct xfs_dquot **O_gdqpp) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_dquot *uq, *gq; + int error; + uint lockflags; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return 0; + + lockflags = XFS_ILOCK_EXCL; + xfs_ilock(ip, lockflags); + + if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip)) + gid = ip->i_d.di_gid; + + /* + * Attach the dquot(s) to this inode, doing a dquot allocation + * if necessary. The dquot(s) will not be locked. + */ + if (XFS_NOT_DQATTACHED(mp, ip)) { + error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC); + if (error) { + xfs_iunlock(ip, lockflags); + return error; + } + } + + uq = gq = NULL; + if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) { + if (ip->i_d.di_uid != uid) { + /* + * What we need is the dquot that has this uid, and + * if we send the inode to dqget, the uid of the inode + * takes priority over what's sent in the uid argument. + * We must unlock inode here before calling dqget if + * we're not sending the inode, because otherwise + * we'll deadlock by doing trans_reserve while + * holding ilock. + */ + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid, + XFS_DQ_USER, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &uq))) { + ASSERT(error != ENOENT); + return error; + } + /* + * Get the ilock in the right order. + */ + xfs_dqunlock(uq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + /* + * Take an extra reference, because we'll return + * this to caller + */ + ASSERT(ip->i_udquot); + uq = ip->i_udquot; + xfs_dqlock(uq); + XFS_DQHOLD(uq); + xfs_dqunlock(uq); + } + } + if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) { + if (ip->i_d.di_gid != gid) { + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid, + XFS_DQ_GROUP, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &gq))) { + if (uq) + xfs_qm_dqrele(uq); + ASSERT(error != ENOENT); + return error; + } + xfs_dqunlock(gq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + ASSERT(ip->i_gdquot); + gq = ip->i_gdquot; + xfs_dqlock(gq); + XFS_DQHOLD(gq); + xfs_dqunlock(gq); + } + } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { + if (xfs_get_projid(ip) != prid) { + xfs_iunlock(ip, lockflags); + if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, + XFS_DQ_PROJ, + XFS_QMOPT_DQALLOC | + XFS_QMOPT_DOWARN, + &gq))) { + if (uq) + xfs_qm_dqrele(uq); + ASSERT(error != ENOENT); + return (error); + } + xfs_dqunlock(gq); + lockflags = XFS_ILOCK_SHARED; + xfs_ilock(ip, lockflags); + } else { + ASSERT(ip->i_gdquot); + gq = ip->i_gdquot; + xfs_dqlock(gq); + XFS_DQHOLD(gq); + xfs_dqunlock(gq); + } + } + if (uq) + trace_xfs_dquot_dqalloc(ip); + + xfs_iunlock(ip, lockflags); + if (O_udqpp) + *O_udqpp = uq; + else if (uq) + xfs_qm_dqrele(uq); + if (O_gdqpp) + *O_gdqpp = gq; + else if (gq) + xfs_qm_dqrele(gq); + return 0; +} + +/* + * Actually transfer ownership, and do dquot modifications. + * These were already reserved. + */ +xfs_dquot_t * +xfs_qm_vop_chown( + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_dquot_t **IO_olddq, + xfs_dquot_t *newdq) +{ + xfs_dquot_t *prevdq; + uint bfield = XFS_IS_REALTIME_INODE(ip) ? + XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; + + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount)); + + /* old dquot */ + prevdq = *IO_olddq; + ASSERT(prevdq); + ASSERT(prevdq != newdq); + + xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks)); + xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); + + /* the sparkling new dquot */ + xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks); + xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); + + /* + * Take an extra reference, because the inode + * is going to keep this dquot pointer even + * after the trans_commit. + */ + xfs_dqlock(newdq); + XFS_DQHOLD(newdq); + xfs_dqunlock(newdq); + *IO_olddq = newdq; + + return prevdq; +} + +/* + * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID). + */ +int +xfs_qm_vop_chown_reserve( + xfs_trans_t *tp, + xfs_inode_t *ip, + xfs_dquot_t *udqp, + xfs_dquot_t *gdqp, + uint flags) +{ + xfs_mount_t *mp = ip->i_mount; + uint delblks, blkflags, prjflags = 0; + xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq; + int error; + + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + delblks = ip->i_delayed_blks; + delblksudq = delblksgdq = unresudq = unresgdq = NULL; + blkflags = XFS_IS_REALTIME_INODE(ip) ? + XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS; + + if (XFS_IS_UQUOTA_ON(mp) && udqp && + ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) { + delblksudq = udqp; + /* + * If there are delayed allocation blocks, then we have to + * unreserve those from the old dquot, and add them to the + * new dquot. + */ + if (delblks) { + ASSERT(ip->i_udquot); + unresudq = ip->i_udquot; + } + } + if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { + if (XFS_IS_PQUOTA_ON(ip->i_mount) && + xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) + prjflags = XFS_QMOPT_ENOSPC; + + if (prjflags || + (XFS_IS_GQUOTA_ON(ip->i_mount) && + ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) { + delblksgdq = gdqp; + if (delblks) { + ASSERT(ip->i_gdquot); + unresgdq = ip->i_gdquot; + } + } + } + + if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount, + delblksudq, delblksgdq, ip->i_d.di_nblocks, 1, + flags | blkflags | prjflags))) + return (error); + + /* + * Do the delayed blks reservations/unreservations now. Since, these + * are done without the help of a transaction, if a reservation fails + * its previous reservations won't be automatically undone by trans + * code. So, we have to do it manually here. + */ + if (delblks) { + /* + * Do the reservations first. Unreservation can't fail. + */ + ASSERT(delblksudq || delblksgdq); + ASSERT(unresudq || unresgdq); + if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, + delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0, + flags | blkflags | prjflags))) + return (error); + xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount, + unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0, + blkflags); + } + + return (0); +} + +int +xfs_qm_vop_rename_dqattach( + struct xfs_inode **i_tab) +{ + struct xfs_mount *mp = i_tab[0]->i_mount; + int i; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return 0; + + for (i = 0; (i < 4 && i_tab[i]); i++) { + struct xfs_inode *ip = i_tab[i]; + int error; + + /* + * Watch out for duplicate entries in the table. + */ + if (i == 0 || ip != i_tab[i-1]) { + if (XFS_NOT_DQATTACHED(mp, ip)) { + error = xfs_qm_dqattach(ip, 0); + if (error) + return error; + } + } + } + return 0; +} + +void +xfs_qm_vop_create_dqattach( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_dquot *udqp, + struct xfs_dquot *gdqp) +{ + struct xfs_mount *mp = tp->t_mountp; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT(XFS_IS_QUOTA_RUNNING(mp)); + + if (udqp) { + xfs_dqlock(udqp); + XFS_DQHOLD(udqp); + xfs_dqunlock(udqp); + ASSERT(ip->i_udquot == NULL); + ip->i_udquot = udqp; + ASSERT(XFS_IS_UQUOTA_ON(mp)); + ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id)); + xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); + } + if (gdqp) { + xfs_dqlock(gdqp); + XFS_DQHOLD(gdqp); + xfs_dqunlock(gdqp); + ASSERT(ip->i_gdquot == NULL); + ip->i_gdquot = gdqp; + ASSERT(XFS_IS_OQUOTA_ON(mp)); + ASSERT((XFS_IS_GQUOTA_ON(mp) ? + ip->i_d.di_gid : xfs_get_projid(ip)) == + be32_to_cpu(gdqp->q_core.d_id)); + xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); + } +} + diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h new file mode 100644 index 000000000000..43b9abe1052c --- /dev/null +++ b/fs/xfs/xfs_qm.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_QM_H__ +#define __XFS_QM_H__ + +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" +#include "xfs_quota_priv.h" +#include "xfs_qm_stats.h" + +struct xfs_qm; +struct xfs_inode; + +extern uint ndquot; +extern struct mutex xfs_Gqm_lock; +extern struct xfs_qm *xfs_Gqm; +extern kmem_zone_t *qm_dqzone; +extern kmem_zone_t *qm_dqtrxzone; + +/* + * Used in xfs_qm_sync called by xfs_sync to count the max times that it can + * iterate over the mountpt's dquot list in one call. + */ +#define XFS_QM_SYNC_MAX_RESTARTS 7 + +/* + * Ditto, for xfs_qm_dqreclaim_one. + */ +#define XFS_QM_RECLAIM_MAX_RESTARTS 4 + +/* + * Ideal ratio of free to in use dquots. Quota manager makes an attempt + * to keep this balance. + */ +#define XFS_QM_DQFREE_RATIO 2 + +/* + * Dquot hashtable constants/threshold values. + */ +#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t)) +#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t)) + +/* + * This defines the unit of allocation of dquots. + * Currently, it is just one file system block, and a 4K blk contains 30 + * (136 * 30 = 4080) dquots. It's probably not worth trying to make + * this more dynamic. + * XXXsup However, if this number is changed, we have to make sure that we don't + * implicitly assume that we do allocations in chunks of a single filesystem + * block in the dquot/xqm code. + */ +#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 + +typedef xfs_dqhash_t xfs_dqlist_t; + +/* + * Quota Manager (global) structure. Lives only in core. + */ +typedef struct xfs_qm { + xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */ + xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */ + uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */ + struct list_head qm_dqfrlist; /* freelist of dquots */ + struct mutex qm_dqfrlist_lock; + int qm_dqfrlist_cnt; + atomic_t qm_totaldquots; /* total incore dquots */ + uint qm_nrefs; /* file systems with quota on */ + int qm_dqfree_ratio;/* ratio of free to inuse dquots */ + kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */ + kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */ +} xfs_qm_t; + +/* + * Various quota information for individual filesystems. + * The mount structure keeps a pointer to this. + */ +typedef struct xfs_quotainfo { + xfs_inode_t *qi_uquotaip; /* user quota inode */ + xfs_inode_t *qi_gquotaip; /* group quota inode */ + struct list_head qi_dqlist; /* all dquots in filesys */ + struct mutex qi_dqlist_lock; + int qi_dquots; + int qi_dqreclaims; /* a change here indicates + a removal in the dqlist */ + time_t qi_btimelimit; /* limit for blks timer */ + time_t qi_itimelimit; /* limit for inodes timer */ + time_t qi_rtbtimelimit;/* limit for rt blks timer */ + xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */ + xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */ + xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */ + struct mutex qi_quotaofflock;/* to serialize quotaoff */ + xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ + uint qi_dqperchunk; /* # ondisk dqs in above chunk */ + xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ + xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ + xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ + xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ + xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ + xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ +} xfs_quotainfo_t; + + +extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long); +extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *, + xfs_dquot_t *, xfs_dquot_t *, long, long, uint); +extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *); +extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *); + +/* + * We keep the usr and grp dquots separately so that locking will be easier + * to do at commit time. All transactions that we know of at this point + * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value. + */ +#define XFS_QM_TRANS_MAXDQS 2 +typedef struct xfs_dquot_acct { + xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS]; + xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS]; +} xfs_dquot_acct_t; + +/* + * Users are allowed to have a usage exceeding their softlimit for + * a period this long. + */ +#define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */ +#define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */ +#define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */ + +#define XFS_QM_BWARNLIMIT 5 +#define XFS_QM_IWARNLIMIT 5 +#define XFS_QM_RTBWARNLIMIT 5 + +extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); +extern int xfs_qm_quotacheck(xfs_mount_t *); +extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t); + +/* dquot stuff */ +extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **); +extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint); +extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint); + +/* quota ops */ +extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint); +extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint, + fs_disk_quota_t *); +extern int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint, + fs_disk_quota_t *); +extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *); +extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); +extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); + +#endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c new file mode 100644 index 000000000000..a0a829addca9 --- /dev/null +++ b/fs/xfs/xfs_qm_bhv.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_qm.h" + + +STATIC void +xfs_fill_statvfs_from_dquot( + struct kstatfs *statp, + xfs_disk_dquot_t *dp) +{ + __uint64_t limit; + + limit = dp->d_blk_softlimit ? + be64_to_cpu(dp->d_blk_softlimit) : + be64_to_cpu(dp->d_blk_hardlimit); + if (limit && statp->f_blocks > limit) { + statp->f_blocks = limit; + statp->f_bfree = statp->f_bavail = + (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ? + (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0; + } + + limit = dp->d_ino_softlimit ? + be64_to_cpu(dp->d_ino_softlimit) : + be64_to_cpu(dp->d_ino_hardlimit); + if (limit && statp->f_files > limit) { + statp->f_files = limit; + statp->f_ffree = + (statp->f_files > be64_to_cpu(dp->d_icount)) ? + (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0; + } +} + + +/* + * Directory tree accounting is implemented using project quotas, where + * the project identifier is inherited from parent directories. + * A statvfs (df, etc.) of a directory that is using project quota should + * return a statvfs of the project, not the entire filesystem. + * This makes such trees appear as if they are filesystems in themselves. + */ +void +xfs_qm_statvfs( + xfs_inode_t *ip, + struct kstatfs *statp) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_dquot_t *dqp; + + if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { + xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); + xfs_qm_dqput(dqp); + } +} + +int +xfs_qm_newmount( + xfs_mount_t *mp, + uint *needquotamount, + uint *quotaflags) +{ + uint quotaondisk; + uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0; + + quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) && + (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT); + + if (quotaondisk) { + uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT; + pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT; + gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT; + } + + /* + * If the device itself is read-only, we can't allow + * the user to change the state of quota on the mount - + * this would generate a transaction on the ro device, + * which would lead to an I/O error and shutdown + */ + + if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) || + (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) || + (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) || + (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) || + (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) || + (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) && + xfs_dev_is_read_only(mp, "changing quota state")) { + xfs_warn(mp, "please mount with%s%s%s%s.", + (!quotaondisk ? "out quota" : ""), + (uquotaondisk ? " usrquota" : ""), + (pquotaondisk ? " prjquota" : ""), + (gquotaondisk ? " grpquota" : "")); + return XFS_ERROR(EPERM); + } + + if (XFS_IS_QUOTA_ON(mp) || quotaondisk) { + /* + * Call mount_quotas at this point only if we won't have to do + * a quotacheck. + */ + if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) { + /* + * If an error occurred, qm_mount_quotas code + * has already disabled quotas. So, just finish + * mounting, and get on with the boring life + * without disk quotas. + */ + xfs_qm_mount_quotas(mp); + } else { + /* + * Clear the quota flags, but remember them. This + * is so that the quota code doesn't get invoked + * before we're ready. This can happen when an + * inode goes inactive and wants to free blocks, + * or via xfs_log_mount_finish. + */ + *needquotamount = B_TRUE; + *quotaflags = mp->m_qflags; + mp->m_qflags = 0; + } + } + + return 0; +} + +void __init +xfs_qm_init(void) +{ + printk(KERN_INFO "SGI XFS Quota Management subsystem\n"); + mutex_init(&xfs_Gqm_lock); + xfs_qm_init_procfs(); +} + +void __exit +xfs_qm_exit(void) +{ + xfs_qm_cleanup_procfs(); + if (qm_dqzone) + kmem_zone_destroy(qm_dqzone); + if (qm_dqtrxzone) + kmem_zone_destroy(qm_dqtrxzone); +} diff --git a/fs/xfs/xfs_qm_stats.c b/fs/xfs/xfs_qm_stats.c new file mode 100644 index 000000000000..8671a0b32644 --- /dev/null +++ b/fs/xfs/xfs_qm_stats.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_qm.h" + +struct xqmstats xqmstats; + +static int xqm_proc_show(struct seq_file *m, void *v) +{ + /* maximum; incore; ratio free to inuse; freelist */ + seq_printf(m, "%d\t%d\t%d\t%u\n", + ndquot, + xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0, + xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0, + xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0); + return 0; +} + +static int xqm_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xqm_proc_show, NULL); +} + +static const struct file_operations xqm_proc_fops = { + .owner = THIS_MODULE, + .open = xqm_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int xqmstat_proc_show(struct seq_file *m, void *v) +{ + /* quota performance statistics */ + seq_printf(m, "qm %u %u %u %u %u %u %u %u\n", + xqmstats.xs_qm_dqreclaims, + xqmstats.xs_qm_dqreclaim_misses, + xqmstats.xs_qm_dquot_dups, + xqmstats.xs_qm_dqcachemisses, + xqmstats.xs_qm_dqcachehits, + xqmstats.xs_qm_dqwants, + xqmstats.xs_qm_dqshake_reclaims, + xqmstats.xs_qm_dqinact_reclaims); + return 0; +} + +static int xqmstat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xqmstat_proc_show, NULL); +} + +static const struct file_operations xqmstat_proc_fops = { + .owner = THIS_MODULE, + .open = xqmstat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +void +xfs_qm_init_procfs(void) +{ + proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops); + proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops); +} + +void +xfs_qm_cleanup_procfs(void) +{ + remove_proc_entry("fs/xfs/xqm", NULL); + remove_proc_entry("fs/xfs/xqmstat", NULL); +} diff --git a/fs/xfs/xfs_qm_stats.h b/fs/xfs/xfs_qm_stats.h new file mode 100644 index 000000000000..5b964fc0dc09 --- /dev/null +++ b/fs/xfs/xfs_qm_stats.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2002 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_QM_STATS_H__ +#define __XFS_QM_STATS_H__ + +#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) + +/* + * XQM global statistics + */ +struct xqmstats { + __uint32_t xs_qm_dqreclaims; + __uint32_t xs_qm_dqreclaim_misses; + __uint32_t xs_qm_dquot_dups; + __uint32_t xs_qm_dqcachemisses; + __uint32_t xs_qm_dqcachehits; + __uint32_t xs_qm_dqwants; + __uint32_t xs_qm_dqshake_reclaims; + __uint32_t xs_qm_dqinact_reclaims; +}; + +extern struct xqmstats xqmstats; + +# define XQM_STATS_INC(count) ( (count)++ ) + +extern void xfs_qm_init_procfs(void); +extern void xfs_qm_cleanup_procfs(void); + +#else + +# define XQM_STATS_INC(count) do { } while (0) + +static inline void xfs_qm_init_procfs(void) { }; +static inline void xfs_qm_cleanup_procfs(void) { }; + +#endif + +#endif /* __XFS_QM_STATS_H__ */ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c new file mode 100644 index 000000000000..609246f42e6c --- /dev/null +++ b/fs/xfs/xfs_qm_syscalls.c @@ -0,0 +1,906 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include + +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_qm.h" +#include "xfs_trace.h" + +STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); +STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, + uint); +STATIC uint xfs_qm_export_flags(uint); +STATIC uint xfs_qm_export_qtype_flags(uint); +STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *, + fs_disk_quota_t *); + + +/* + * Turn off quota accounting and/or enforcement for all udquots and/or + * gdquots. Called only at unmount time. + * + * This assumes that there are no dquots of this file system cached + * incore, and modifies the ondisk dquot directly. Therefore, for example, + * it is an error to call this twice, without purging the cache. + */ +int +xfs_qm_scall_quotaoff( + xfs_mount_t *mp, + uint flags) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + uint dqtype; + int error; + uint inactivate_flags; + xfs_qoff_logitem_t *qoffstart; + int nculprits; + + /* + * No file system can have quotas enabled on disk but not in core. + * Note that quota utilities (like quotaoff) _expect_ + * errno == EEXIST here. + */ + if ((mp->m_qflags & flags) == 0) + return XFS_ERROR(EEXIST); + error = 0; + + flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); + + /* + * We don't want to deal with two quotaoffs messing up each other, + * so we're going to serialize it. quotaoff isn't exactly a performance + * critical thing. + * If quotaoff, then we must be dealing with the root filesystem. + */ + ASSERT(q); + mutex_lock(&q->qi_quotaofflock); + + /* + * If we're just turning off quota enforcement, change mp and go. + */ + if ((flags & XFS_ALL_QUOTA_ACCT) == 0) { + mp->m_qflags &= ~(flags); + + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = mp->m_qflags; + spin_unlock(&mp->m_sb_lock); + mutex_unlock(&q->qi_quotaofflock); + + /* XXX what to do if error ? Revert back to old vals incore ? */ + error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS); + return (error); + } + + dqtype = 0; + inactivate_flags = 0; + /* + * If accounting is off, we must turn enforcement off, clear the + * quota 'CHKD' certificate to make it known that we have to + * do a quotacheck the next time this quota is turned on. + */ + if (flags & XFS_UQUOTA_ACCT) { + dqtype |= XFS_QMOPT_UQUOTA; + flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD); + inactivate_flags |= XFS_UQUOTA_ACTIVE; + } + if (flags & XFS_GQUOTA_ACCT) { + dqtype |= XFS_QMOPT_GQUOTA; + flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); + inactivate_flags |= XFS_GQUOTA_ACTIVE; + } else if (flags & XFS_PQUOTA_ACCT) { + dqtype |= XFS_QMOPT_PQUOTA; + flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD); + inactivate_flags |= XFS_PQUOTA_ACTIVE; + } + + /* + * Nothing to do? Don't complain. This happens when we're just + * turning off quota enforcement. + */ + if ((mp->m_qflags & flags) == 0) + goto out_unlock; + + /* + * Write the LI_QUOTAOFF log record, and do SB changes atomically, + * and synchronously. If we fail to write, we should abort the + * operation as it cannot be recovered safely if we crash. + */ + error = xfs_qm_log_quotaoff(mp, &qoffstart, flags); + if (error) + goto out_unlock; + + /* + * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct + * to take care of the race between dqget and quotaoff. We don't take + * any special locks to reset these bits. All processes need to check + * these bits *after* taking inode lock(s) to see if the particular + * quota type is in the process of being turned off. If *ACTIVE, it is + * guaranteed that all dquot structures and all quotainode ptrs will all + * stay valid as long as that inode is kept locked. + * + * There is no turning back after this. + */ + mp->m_qflags &= ~inactivate_flags; + + /* + * Give back all the dquot reference(s) held by inodes. + * Here we go thru every single incore inode in this file system, and + * do a dqrele on the i_udquot/i_gdquot that it may have. + * Essentially, as long as somebody has an inode locked, this guarantees + * that quotas will not be turned off. This is handy because in a + * transaction once we lock the inode(s) and check for quotaon, we can + * depend on the quota inodes (and other things) being valid as long as + * we keep the lock(s). + */ + xfs_qm_dqrele_all_inodes(mp, flags); + + /* + * Next we make the changes in the quota flag in the mount struct. + * This isn't protected by a particular lock directly, because we + * don't want to take a mrlock every time we depend on quotas being on. + */ + mp->m_qflags &= ~(flags); + + /* + * Go through all the dquots of this file system and purge them, + * according to what was turned off. We may not be able to get rid + * of all dquots, because dquots can have temporary references that + * are not attached to inodes. eg. xfs_setattr, xfs_create. + * So, if we couldn't purge all the dquots from the filesystem, + * we can't get rid of the incore data structures. + */ + while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype))) + delay(10 * nculprits); + + /* + * Transactions that had started before ACTIVE state bit was cleared + * could have logged many dquots, so they'd have higher LSNs than + * the first QUOTAOFF log record does. If we happen to crash when + * the tail of the log has gone past the QUOTAOFF record, but + * before the last dquot modification, those dquots __will__ + * recover, and that's not good. + * + * So, we have QUOTAOFF start and end logitems; the start + * logitem won't get overwritten until the end logitem appears... + */ + error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags); + if (error) { + /* We're screwed now. Shutdown is the only option. */ + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + goto out_unlock; + } + + /* + * If quotas is completely disabled, close shop. + */ + if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) || + ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) { + mutex_unlock(&q->qi_quotaofflock); + xfs_qm_destroy_quotainfo(mp); + return (0); + } + + /* + * Release our quotainode references if we don't need them anymore. + */ + if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) { + IRELE(q->qi_uquotaip); + q->qi_uquotaip = NULL; + } + if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) { + IRELE(q->qi_gquotaip); + q->qi_gquotaip = NULL; + } + +out_unlock: + mutex_unlock(&q->qi_quotaofflock); + return error; +} + +STATIC int +xfs_qm_scall_trunc_qfile( + struct xfs_mount *mp, + xfs_ino_t ino) +{ + struct xfs_inode *ip; + struct xfs_trans *tp; + int error; + + if (ino == NULLFSINO) + return 0; + + error = xfs_iget(mp, NULL, ino, 0, 0, &ip); + if (error) + return error; + + xfs_ilock(ip, XFS_IOLOCK_EXCL); + + tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE); + error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, + XFS_TRANS_PERM_LOG_RES, + XFS_ITRUNCATE_LOG_COUNT); + if (error) { + xfs_trans_cancel(tp, 0); + xfs_iunlock(ip, XFS_IOLOCK_EXCL); + goto out_put; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip); + + error = xfs_itruncate_data(&tp, ip, 0); + if (error) { + xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | + XFS_TRANS_ABORT); + goto out_unlock; + } + + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); + +out_unlock: + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); +out_put: + IRELE(ip); + return error; +} + +int +xfs_qm_scall_trunc_qfiles( + xfs_mount_t *mp, + uint flags) +{ + int error = 0, error2 = 0; + + if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { + xfs_debug(mp, "%s: flags=%x m_qflags=%x\n", + __func__, flags, mp->m_qflags); + return XFS_ERROR(EINVAL); + } + + if (flags & XFS_DQ_USER) + error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); + if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ)) + error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); + + return error ? error : error2; +} + +/* + * Switch on (a given) quota enforcement for a filesystem. This takes + * effect immediately. + * (Switching on quota accounting must be done at mount time.) + */ +int +xfs_qm_scall_quotaon( + xfs_mount_t *mp, + uint flags) +{ + int error; + uint qf; + __int64_t sbflags; + + flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD); + /* + * Switching on quota accounting must be done at mount time. + */ + flags &= ~(XFS_ALL_QUOTA_ACCT); + + sbflags = 0; + + if (flags == 0) { + xfs_debug(mp, "%s: zero flags, m_qflags=%x\n", + __func__, mp->m_qflags); + return XFS_ERROR(EINVAL); + } + + /* No fs can turn on quotas with a delayed effect */ + ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0); + + /* + * Can't enforce without accounting. We check the superblock + * qflags here instead of m_qflags because rootfs can have + * quota acct on ondisk without m_qflags' knowing. + */ + if (((flags & XFS_UQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 && + (flags & XFS_UQUOTA_ENFD)) + || + ((flags & XFS_PQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 && + (flags & XFS_GQUOTA_ACCT) == 0 && + (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 && + (flags & XFS_OQUOTA_ENFD))) { + xfs_debug(mp, + "%s: Can't enforce without acct, flags=%x sbflags=%x\n", + __func__, flags, mp->m_sb.sb_qflags); + return XFS_ERROR(EINVAL); + } + /* + * If everything's up to-date incore, then don't waste time. + */ + if ((mp->m_qflags & flags) == flags) + return XFS_ERROR(EEXIST); + + /* + * Change sb_qflags on disk but not incore mp->qflags + * if this is the root filesystem. + */ + spin_lock(&mp->m_sb_lock); + qf = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = qf | flags; + spin_unlock(&mp->m_sb_lock); + + /* + * There's nothing to change if it's the same. + */ + if ((qf & flags) == flags && sbflags == 0) + return XFS_ERROR(EEXIST); + sbflags |= XFS_SB_QFLAGS; + + if ((error = xfs_qm_write_sb_changes(mp, sbflags))) + return (error); + /* + * If we aren't trying to switch on quota enforcement, we are done. + */ + if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) != + (mp->m_qflags & XFS_UQUOTA_ACCT)) || + ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) != + (mp->m_qflags & XFS_PQUOTA_ACCT)) || + ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) != + (mp->m_qflags & XFS_GQUOTA_ACCT)) || + (flags & XFS_ALL_QUOTA_ENFD) == 0) + return (0); + + if (! XFS_IS_QUOTA_RUNNING(mp)) + return XFS_ERROR(ESRCH); + + /* + * Switch on quota enforcement in core. + */ + mutex_lock(&mp->m_quotainfo->qi_quotaofflock); + mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD); + mutex_unlock(&mp->m_quotainfo->qi_quotaofflock); + + return (0); +} + + +/* + * Return quota status information, such as uquota-off, enforcements, etc. + */ +int +xfs_qm_scall_getqstat( + struct xfs_mount *mp, + struct fs_quota_stat *out) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + struct xfs_inode *uip, *gip; + boolean_t tempuqip, tempgqip; + + uip = gip = NULL; + tempuqip = tempgqip = B_FALSE; + memset(out, 0, sizeof(fs_quota_stat_t)); + + out->qs_version = FS_QSTAT_VERSION; + if (!xfs_sb_version_hasquota(&mp->m_sb)) { + out->qs_uquota.qfs_ino = NULLFSINO; + out->qs_gquota.qfs_ino = NULLFSINO; + return (0); + } + out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags & + (XFS_ALL_QUOTA_ACCT| + XFS_ALL_QUOTA_ENFD)); + out->qs_pad = 0; + out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino; + out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino; + + if (q) { + uip = q->qi_uquotaip; + gip = q->qi_gquotaip; + } + if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) { + if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino, + 0, 0, &uip) == 0) + tempuqip = B_TRUE; + } + if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) { + if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino, + 0, 0, &gip) == 0) + tempgqip = B_TRUE; + } + if (uip) { + out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks; + out->qs_uquota.qfs_nextents = uip->i_d.di_nextents; + if (tempuqip) + IRELE(uip); + } + if (gip) { + out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks; + out->qs_gquota.qfs_nextents = gip->i_d.di_nextents; + if (tempgqip) + IRELE(gip); + } + if (q) { + out->qs_incoredqs = q->qi_dquots; + out->qs_btimelimit = q->qi_btimelimit; + out->qs_itimelimit = q->qi_itimelimit; + out->qs_rtbtimelimit = q->qi_rtbtimelimit; + out->qs_bwarnlimit = q->qi_bwarnlimit; + out->qs_iwarnlimit = q->qi_iwarnlimit; + } + return 0; +} + +#define XFS_DQ_MASK \ + (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK) + +/* + * Adjust quota limits, and start/stop timers accordingly. + */ +int +xfs_qm_scall_setqlim( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + fs_disk_quota_t *newlim) +{ + struct xfs_quotainfo *q = mp->m_quotainfo; + xfs_disk_dquot_t *ddq; + xfs_dquot_t *dqp; + xfs_trans_t *tp; + int error; + xfs_qcnt_t hard, soft; + + if (newlim->d_fieldmask & ~XFS_DQ_MASK) + return EINVAL; + if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0) + return 0; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); + if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128, + 0, 0, XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return (error); + } + + /* + * We don't want to race with a quotaoff so take the quotaoff lock. + * (We don't hold an inode lock, so there's nothing else to stop + * a quotaoff from happening). (XXXThis doesn't currently happen + * because we take the vfslock before calling xfs_qm_sysent). + */ + mutex_lock(&q->qi_quotaofflock); + + /* + * Get the dquot (locked), and join it to the transaction. + * Allocate the dquot if this doesn't exist. + */ + if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) { + xfs_trans_cancel(tp, XFS_TRANS_ABORT); + ASSERT(error != ENOENT); + goto out_unlock; + } + xfs_trans_dqjoin(tp, dqp); + ddq = &dqp->q_core; + + /* + * Make sure that hardlimits are >= soft limits before changing. + */ + hard = (newlim->d_fieldmask & FS_DQ_BHARD) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) : + be64_to_cpu(ddq->d_blk_hardlimit); + soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) : + be64_to_cpu(ddq->d_blk_softlimit); + if (hard == 0 || hard >= soft) { + ddq->d_blk_hardlimit = cpu_to_be64(hard); + ddq->d_blk_softlimit = cpu_to_be64(soft); + if (id == 0) { + q->qi_bhardlimit = hard; + q->qi_bsoftlimit = soft; + } + } else { + xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); + } + hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : + be64_to_cpu(ddq->d_rtb_hardlimit); + soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ? + (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) : + be64_to_cpu(ddq->d_rtb_softlimit); + if (hard == 0 || hard >= soft) { + ddq->d_rtb_hardlimit = cpu_to_be64(hard); + ddq->d_rtb_softlimit = cpu_to_be64(soft); + if (id == 0) { + q->qi_rtbhardlimit = hard; + q->qi_rtbsoftlimit = soft; + } + } else { + xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); + } + + hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? + (xfs_qcnt_t) newlim->d_ino_hardlimit : + be64_to_cpu(ddq->d_ino_hardlimit); + soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ? + (xfs_qcnt_t) newlim->d_ino_softlimit : + be64_to_cpu(ddq->d_ino_softlimit); + if (hard == 0 || hard >= soft) { + ddq->d_ino_hardlimit = cpu_to_be64(hard); + ddq->d_ino_softlimit = cpu_to_be64(soft); + if (id == 0) { + q->qi_ihardlimit = hard; + q->qi_isoftlimit = soft; + } + } else { + xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); + } + + /* + * Update warnings counter(s) if requested + */ + if (newlim->d_fieldmask & FS_DQ_BWARNS) + ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns); + if (newlim->d_fieldmask & FS_DQ_IWARNS) + ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns); + if (newlim->d_fieldmask & FS_DQ_RTBWARNS) + ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns); + + if (id == 0) { + /* + * Timelimits for the super user set the relative time + * the other users can be over quota for this file system. + * If it is zero a default is used. Ditto for the default + * soft and hard limit values (already done, above), and + * for warnings. + */ + if (newlim->d_fieldmask & FS_DQ_BTIMER) { + q->qi_btimelimit = newlim->d_btimer; + ddq->d_btimer = cpu_to_be32(newlim->d_btimer); + } + if (newlim->d_fieldmask & FS_DQ_ITIMER) { + q->qi_itimelimit = newlim->d_itimer; + ddq->d_itimer = cpu_to_be32(newlim->d_itimer); + } + if (newlim->d_fieldmask & FS_DQ_RTBTIMER) { + q->qi_rtbtimelimit = newlim->d_rtbtimer; + ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer); + } + if (newlim->d_fieldmask & FS_DQ_BWARNS) + q->qi_bwarnlimit = newlim->d_bwarns; + if (newlim->d_fieldmask & FS_DQ_IWARNS) + q->qi_iwarnlimit = newlim->d_iwarns; + if (newlim->d_fieldmask & FS_DQ_RTBWARNS) + q->qi_rtbwarnlimit = newlim->d_rtbwarns; + } else { + /* + * If the user is now over quota, start the timelimit. + * The user will not be 'warned'. + * Note that we keep the timers ticking, whether enforcement + * is on or off. We don't really want to bother with iterating + * over all ondisk dquots and turning the timers on/off. + */ + xfs_qm_adjust_dqtimers(mp, ddq); + } + dqp->dq_flags |= XFS_DQ_DIRTY; + xfs_trans_log_dquot(tp, dqp); + + error = xfs_trans_commit(tp, 0); + xfs_qm_dqrele(dqp); + + out_unlock: + mutex_unlock(&q->qi_quotaofflock); + return error; +} + +int +xfs_qm_scall_getquota( + xfs_mount_t *mp, + xfs_dqid_t id, + uint type, + fs_disk_quota_t *out) +{ + xfs_dquot_t *dqp; + int error; + + /* + * Try to get the dquot. We don't want it allocated on disk, so + * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't + * exist, we'll get ENOENT back. + */ + if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) { + return (error); + } + + /* + * If everything's NULL, this dquot doesn't quite exist as far as + * our utility programs are concerned. + */ + if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { + xfs_qm_dqput(dqp); + return XFS_ERROR(ENOENT); + } + /* + * Convert the disk dquot to the exportable format + */ + xfs_qm_export_dquot(mp, &dqp->q_core, out); + xfs_qm_dqput(dqp); + return (error ? XFS_ERROR(EFAULT) : 0); +} + + +STATIC int +xfs_qm_log_quotaoff_end( + xfs_mount_t *mp, + xfs_qoff_logitem_t *startqoff, + uint flags) +{ + xfs_trans_t *tp; + int error; + xfs_qoff_logitem_t *qoffi; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END); + + if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2, + 0, 0, XFS_DEFAULT_LOG_COUNT))) { + xfs_trans_cancel(tp, 0); + return (error); + } + + qoffi = xfs_trans_get_qoff_item(tp, startqoff, + flags & XFS_ALL_QUOTA_ACCT); + xfs_trans_log_quotaoff_item(tp, qoffi); + + /* + * We have to make sure that the transaction is secure on disk before we + * return and actually stop quota accounting. So, make it synchronous. + * We don't care about quotoff's performance. + */ + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + return (error); +} + + +STATIC int +xfs_qm_log_quotaoff( + xfs_mount_t *mp, + xfs_qoff_logitem_t **qoffstartp, + uint flags) +{ + xfs_trans_t *tp; + int error; + xfs_qoff_logitem_t *qoffi=NULL; + uint oldsbqflag=0; + + tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF); + if ((error = xfs_trans_reserve(tp, 0, + sizeof(xfs_qoff_logitem_t) * 2 + + mp->m_sb.sb_sectsize + 128, + 0, + 0, + XFS_DEFAULT_LOG_COUNT))) { + goto error0; + } + + qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT); + xfs_trans_log_quotaoff_item(tp, qoffi); + + spin_lock(&mp->m_sb_lock); + oldsbqflag = mp->m_sb.sb_qflags; + mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL; + spin_unlock(&mp->m_sb_lock); + + xfs_mod_sb(tp, XFS_SB_QFLAGS); + + /* + * We have to make sure that the transaction is secure on disk before we + * return and actually stop quota accounting. So, make it synchronous. + * We don't care about quotoff's performance. + */ + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); + +error0: + if (error) { + xfs_trans_cancel(tp, 0); + /* + * No one else is modifying sb_qflags, so this is OK. + * We still hold the quotaofflock. + */ + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags = oldsbqflag; + spin_unlock(&mp->m_sb_lock); + } + *qoffstartp = qoffi; + return (error); +} + + +/* + * Translate an internal style on-disk-dquot to the exportable format. + * The main differences are that the counters/limits are all in Basic + * Blocks (BBs) instead of the internal FSBs, and all on-disk data has + * to be converted to the native endianness. + */ +STATIC void +xfs_qm_export_dquot( + xfs_mount_t *mp, + xfs_disk_dquot_t *src, + struct fs_disk_quota *dst) +{ + memset(dst, 0, sizeof(*dst)); + dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */ + dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags); + dst->d_id = be32_to_cpu(src->d_id); + dst->d_blk_hardlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit)); + dst->d_blk_softlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit)); + dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit); + dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit); + dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount)); + dst->d_icount = be64_to_cpu(src->d_icount); + dst->d_btimer = be32_to_cpu(src->d_btimer); + dst->d_itimer = be32_to_cpu(src->d_itimer); + dst->d_iwarns = be16_to_cpu(src->d_iwarns); + dst->d_bwarns = be16_to_cpu(src->d_bwarns); + dst->d_rtb_hardlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit)); + dst->d_rtb_softlimit = + XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit)); + dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount)); + dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer); + dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns); + + /* + * Internally, we don't reset all the timers when quota enforcement + * gets turned off. No need to confuse the user level code, + * so return zeroes in that case. + */ + if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) || + (!XFS_IS_OQUOTA_ENFORCED(mp) && + (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) { + dst->d_btimer = 0; + dst->d_itimer = 0; + dst->d_rtbtimer = 0; + } + +#ifdef DEBUG + if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) || + (XFS_IS_OQUOTA_ENFORCED(mp) && + (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) && + dst->d_id != 0) { + if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) && + (dst->d_blk_softlimit > 0)) { + ASSERT(dst->d_btimer != 0); + } + if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) && + (dst->d_ino_softlimit > 0)) { + ASSERT(dst->d_itimer != 0); + } + } +#endif +} + +STATIC uint +xfs_qm_export_qtype_flags( + uint flags) +{ + /* + * Can't be more than one, or none. + */ + ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) != + (FS_PROJ_QUOTA | FS_USER_QUOTA)); + ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) != + (FS_PROJ_QUOTA | FS_GROUP_QUOTA)); + ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) != + (FS_USER_QUOTA | FS_GROUP_QUOTA)); + ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0); + + return (flags & XFS_DQ_USER) ? + FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ? + FS_PROJ_QUOTA : FS_GROUP_QUOTA; +} + +STATIC uint +xfs_qm_export_flags( + uint flags) +{ + uint uflags; + + uflags = 0; + if (flags & XFS_UQUOTA_ACCT) + uflags |= FS_QUOTA_UDQ_ACCT; + if (flags & XFS_PQUOTA_ACCT) + uflags |= FS_QUOTA_PDQ_ACCT; + if (flags & XFS_GQUOTA_ACCT) + uflags |= FS_QUOTA_GDQ_ACCT; + if (flags & XFS_UQUOTA_ENFD) + uflags |= FS_QUOTA_UDQ_ENFD; + if (flags & (XFS_OQUOTA_ENFD)) { + uflags |= (flags & XFS_GQUOTA_ACCT) ? + FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD; + } + return (uflags); +} + + +STATIC int +xfs_dqrele_inode( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + /* skip quota inodes */ + if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || + ip == ip->i_mount->m_quotainfo->qi_gquotaip) { + ASSERT(ip->i_udquot == NULL); + ASSERT(ip->i_gdquot == NULL); + return 0; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { + xfs_qm_dqrele(ip->i_udquot); + ip->i_udquot = NULL; + } + if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) { + xfs_qm_dqrele(ip->i_gdquot); + ip->i_gdquot = NULL; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return 0; +} + + +/* + * Go thru all the inodes in the file system, releasing their dquots. + * + * Note that the mount structure gets modified to indicate that quotas are off + * AFTER this, in the case of quotaoff. + */ +void +xfs_qm_dqrele_all_inodes( + struct xfs_mount *mp, + uint flags) +{ + ASSERT(mp->m_quotainfo); + xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); +} diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h new file mode 100644 index 000000000000..94a3d927d716 --- /dev/null +++ b/fs/xfs/xfs_quota_priv.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2000-2003 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_QUOTA_PRIV_H__ +#define __XFS_QUOTA_PRIV_H__ + +/* + * Number of bmaps that we ask from bmapi when doing a quotacheck. + * We make this restriction to keep the memory usage to a minimum. + */ +#define XFS_DQITER_MAP_SIZE 10 + +/* + * Hash into a bucket in the dquot hash table, based on . + */ +#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \ + (__psunsigned_t)(id)) & \ + (xfs_Gqm->qm_dqhashmask - 1)) +#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \ + (xfs_Gqm->qm_usr_dqhtable + \ + XFS_DQ_HASHVAL(mp, id)) : \ + (xfs_Gqm->qm_grp_dqhtable + \ + XFS_DQ_HASHVAL(mp, id))) +#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ + !dqp->q_core.d_blk_hardlimit && \ + !dqp->q_core.d_blk_softlimit && \ + !dqp->q_core.d_rtb_hardlimit && \ + !dqp->q_core.d_rtb_softlimit && \ + !dqp->q_core.d_ino_hardlimit && \ + !dqp->q_core.d_ino_softlimit && \ + !dqp->q_core.d_bcount && \ + !dqp->q_core.d_rtbcount && \ + !dqp->q_core.d_icount) + +#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ + (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ + (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) + +#endif /* __XFS_QUOTA_PRIV_H__ */ diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c new file mode 100644 index 000000000000..7e76f537abb7 --- /dev/null +++ b/fs/xfs/xfs_quotaops.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2008, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_sb.h" +#include "xfs_inum.h" +#include "xfs_log.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_quota.h" +#include "xfs_trans.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_qm.h" +#include + + +STATIC int +xfs_quota_type(int type) +{ + switch (type) { + case USRQUOTA: + return XFS_DQ_USER; + case GRPQUOTA: + return XFS_DQ_GROUP; + default: + return XFS_DQ_PROJ; + } +} + +STATIC int +xfs_fs_get_xstate( + struct super_block *sb, + struct fs_quota_stat *fqs) +{ + struct xfs_mount *mp = XFS_M(sb); + + if (!XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + return -xfs_qm_scall_getqstat(mp, fqs); +} + +STATIC int +xfs_fs_set_xstate( + struct super_block *sb, + unsigned int uflags, + int op) +{ + struct xfs_mount *mp = XFS_M(sb); + unsigned int flags = 0; + + if (sb->s_flags & MS_RDONLY) + return -EROFS; + if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + + if (uflags & FS_QUOTA_UDQ_ACCT) + flags |= XFS_UQUOTA_ACCT; + if (uflags & FS_QUOTA_PDQ_ACCT) + flags |= XFS_PQUOTA_ACCT; + if (uflags & FS_QUOTA_GDQ_ACCT) + flags |= XFS_GQUOTA_ACCT; + if (uflags & FS_QUOTA_UDQ_ENFD) + flags |= XFS_UQUOTA_ENFD; + if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD)) + flags |= XFS_OQUOTA_ENFD; + + switch (op) { + case Q_XQUOTAON: + return -xfs_qm_scall_quotaon(mp, flags); + case Q_XQUOTAOFF: + if (!XFS_IS_QUOTA_ON(mp)) + return -EINVAL; + return -xfs_qm_scall_quotaoff(mp, flags); + case Q_XQUOTARM: + if (XFS_IS_QUOTA_ON(mp)) + return -EINVAL; + return -xfs_qm_scall_trunc_qfiles(mp, flags); + } + + return -EINVAL; +} + +STATIC int +xfs_fs_get_dqblk( + struct super_block *sb, + int type, + qid_t id, + struct fs_disk_quota *fdq) +{ + struct xfs_mount *mp = XFS_M(sb); + + if (!XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + if (!XFS_IS_QUOTA_ON(mp)) + return -ESRCH; + + return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq); +} + +STATIC int +xfs_fs_set_dqblk( + struct super_block *sb, + int type, + qid_t id, + struct fs_disk_quota *fdq) +{ + struct xfs_mount *mp = XFS_M(sb); + + if (sb->s_flags & MS_RDONLY) + return -EROFS; + if (!XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + if (!XFS_IS_QUOTA_ON(mp)) + return -ESRCH; + + return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq); +} + +const struct quotactl_ops xfs_quotactl_operations = { + .get_xstate = xfs_fs_get_xstate, + .set_xstate = xfs_fs_set_xstate, + .get_dqblk = xfs_fs_get_dqblk, + .set_dqblk = xfs_fs_set_dqblk, +}; diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c new file mode 100644 index 000000000000..76fdc5861932 --- /dev/null +++ b/fs/xfs/xfs_stats.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include + +DEFINE_PER_CPU(struct xfsstats, xfsstats); + +static int xfs_stat_proc_show(struct seq_file *m, void *v) +{ + int c, i, j, val; + __uint64_t xs_xstrat_bytes = 0; + __uint64_t xs_write_bytes = 0; + __uint64_t xs_read_bytes = 0; + + static const struct xstats_entry { + char *desc; + int endpoint; + } xstats[] = { + { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC }, + { "abt", XFSSTAT_END_ALLOC_BTREE }, + { "blk_map", XFSSTAT_END_BLOCK_MAPPING }, + { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE }, + { "dir", XFSSTAT_END_DIRECTORY_OPS }, + { "trans", XFSSTAT_END_TRANSACTIONS }, + { "ig", XFSSTAT_END_INODE_OPS }, + { "log", XFSSTAT_END_LOG_OPS }, + { "push_ail", XFSSTAT_END_TAIL_PUSHING }, + { "xstrat", XFSSTAT_END_WRITE_CONVERT }, + { "rw", XFSSTAT_END_READ_WRITE_OPS }, + { "attr", XFSSTAT_END_ATTRIBUTE_OPS }, + { "icluster", XFSSTAT_END_INODE_CLUSTER }, + { "vnodes", XFSSTAT_END_VNODE_OPS }, + { "buf", XFSSTAT_END_BUF }, + { "abtb2", XFSSTAT_END_ABTB_V2 }, + { "abtc2", XFSSTAT_END_ABTC_V2 }, + { "bmbt2", XFSSTAT_END_BMBT_V2 }, + { "ibt2", XFSSTAT_END_IBT_V2 }, + }; + + /* Loop over all stats groups */ + for (i=j = 0; i < ARRAY_SIZE(xstats); i++) { + seq_printf(m, "%s", xstats[i].desc); + /* inner loop does each group */ + while (j < xstats[i].endpoint) { + val = 0; + /* sum over all cpus */ + for_each_possible_cpu(c) + val += *(((__u32*)&per_cpu(xfsstats, c) + j)); + seq_printf(m, " %u", val); + j++; + } + seq_putc(m, '\n'); + } + /* extra precision counters */ + for_each_possible_cpu(i) { + xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; + xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; + xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; + } + + seq_printf(m, "xpc %Lu %Lu %Lu\n", + xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); + seq_printf(m, "debug %u\n", +#if defined(DEBUG) + 1); +#else + 0); +#endif + return 0; +} + +static int xfs_stat_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, xfs_stat_proc_show, NULL); +} + +static const struct file_operations xfs_stat_proc_fops = { + .owner = THIS_MODULE, + .open = xfs_stat_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +int +xfs_init_procfs(void) +{ + if (!proc_mkdir("fs/xfs", NULL)) + goto out; + + if (!proc_create("fs/xfs/stat", 0, NULL, + &xfs_stat_proc_fops)) + goto out_remove_entry; + return 0; + + out_remove_entry: + remove_proc_entry("fs/xfs", NULL); + out: + return -ENOMEM; +} + +void +xfs_cleanup_procfs(void) +{ + remove_proc_entry("fs/xfs/stat", NULL); + remove_proc_entry("fs/xfs", NULL); +} diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h new file mode 100644 index 000000000000..736854b1ca1a --- /dev/null +++ b/fs/xfs/xfs_stats.h @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2000,2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_STATS_H__ +#define __XFS_STATS_H__ + + +#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF) + +#include + +/* + * XFS global statistics + */ +struct xfsstats { +# define XFSSTAT_END_EXTENT_ALLOC 4 + __uint32_t xs_allocx; + __uint32_t xs_allocb; + __uint32_t xs_freex; + __uint32_t xs_freeb; +# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4) + __uint32_t xs_abt_lookup; + __uint32_t xs_abt_compare; + __uint32_t xs_abt_insrec; + __uint32_t xs_abt_delrec; +# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7) + __uint32_t xs_blk_mapr; + __uint32_t xs_blk_mapw; + __uint32_t xs_blk_unmap; + __uint32_t xs_add_exlist; + __uint32_t xs_del_exlist; + __uint32_t xs_look_exlist; + __uint32_t xs_cmp_exlist; +# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4) + __uint32_t xs_bmbt_lookup; + __uint32_t xs_bmbt_compare; + __uint32_t xs_bmbt_insrec; + __uint32_t xs_bmbt_delrec; +# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4) + __uint32_t xs_dir_lookup; + __uint32_t xs_dir_create; + __uint32_t xs_dir_remove; + __uint32_t xs_dir_getdents; +# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3) + __uint32_t xs_trans_sync; + __uint32_t xs_trans_async; + __uint32_t xs_trans_empty; +# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7) + __uint32_t xs_ig_attempts; + __uint32_t xs_ig_found; + __uint32_t xs_ig_frecycle; + __uint32_t xs_ig_missed; + __uint32_t xs_ig_dup; + __uint32_t xs_ig_reclaims; + __uint32_t xs_ig_attrchg; +# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5) + __uint32_t xs_log_writes; + __uint32_t xs_log_blocks; + __uint32_t xs_log_noiclogs; + __uint32_t xs_log_force; + __uint32_t xs_log_force_sleep; +# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10) + __uint32_t xs_try_logspace; + __uint32_t xs_sleep_logspace; + __uint32_t xs_push_ail; + __uint32_t xs_push_ail_success; + __uint32_t xs_push_ail_pushbuf; + __uint32_t xs_push_ail_pinned; + __uint32_t xs_push_ail_locked; + __uint32_t xs_push_ail_flushing; + __uint32_t xs_push_ail_restarts; + __uint32_t xs_push_ail_flush; +# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2) + __uint32_t xs_xstrat_quick; + __uint32_t xs_xstrat_split; +# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2) + __uint32_t xs_write_calls; + __uint32_t xs_read_calls; +# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4) + __uint32_t xs_attr_get; + __uint32_t xs_attr_set; + __uint32_t xs_attr_remove; + __uint32_t xs_attr_list; +# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3) + __uint32_t xs_iflush_count; + __uint32_t xs_icluster_flushcnt; + __uint32_t xs_icluster_flushinode; +# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8) + __uint32_t vn_active; /* # vnodes not on free lists */ + __uint32_t vn_alloc; /* # times vn_alloc called */ + __uint32_t vn_get; /* # times vn_get called */ + __uint32_t vn_hold; /* # times vn_hold called */ + __uint32_t vn_rele; /* # times vn_rele called */ + __uint32_t vn_reclaim; /* # times vn_reclaim called */ + __uint32_t vn_remove; /* # times vn_remove called */ + __uint32_t vn_free; /* # times vn_free called */ +#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9) + __uint32_t xb_get; + __uint32_t xb_create; + __uint32_t xb_get_locked; + __uint32_t xb_get_locked_waited; + __uint32_t xb_busy_locked; + __uint32_t xb_miss_locked; + __uint32_t xb_page_retries; + __uint32_t xb_page_found; + __uint32_t xb_get_read; +/* Version 2 btree counters */ +#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15) + __uint32_t xs_abtb_2_lookup; + __uint32_t xs_abtb_2_compare; + __uint32_t xs_abtb_2_insrec; + __uint32_t xs_abtb_2_delrec; + __uint32_t xs_abtb_2_newroot; + __uint32_t xs_abtb_2_killroot; + __uint32_t xs_abtb_2_increment; + __uint32_t xs_abtb_2_decrement; + __uint32_t xs_abtb_2_lshift; + __uint32_t xs_abtb_2_rshift; + __uint32_t xs_abtb_2_split; + __uint32_t xs_abtb_2_join; + __uint32_t xs_abtb_2_alloc; + __uint32_t xs_abtb_2_free; + __uint32_t xs_abtb_2_moves; +#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15) + __uint32_t xs_abtc_2_lookup; + __uint32_t xs_abtc_2_compare; + __uint32_t xs_abtc_2_insrec; + __uint32_t xs_abtc_2_delrec; + __uint32_t xs_abtc_2_newroot; + __uint32_t xs_abtc_2_killroot; + __uint32_t xs_abtc_2_increment; + __uint32_t xs_abtc_2_decrement; + __uint32_t xs_abtc_2_lshift; + __uint32_t xs_abtc_2_rshift; + __uint32_t xs_abtc_2_split; + __uint32_t xs_abtc_2_join; + __uint32_t xs_abtc_2_alloc; + __uint32_t xs_abtc_2_free; + __uint32_t xs_abtc_2_moves; +#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15) + __uint32_t xs_bmbt_2_lookup; + __uint32_t xs_bmbt_2_compare; + __uint32_t xs_bmbt_2_insrec; + __uint32_t xs_bmbt_2_delrec; + __uint32_t xs_bmbt_2_newroot; + __uint32_t xs_bmbt_2_killroot; + __uint32_t xs_bmbt_2_increment; + __uint32_t xs_bmbt_2_decrement; + __uint32_t xs_bmbt_2_lshift; + __uint32_t xs_bmbt_2_rshift; + __uint32_t xs_bmbt_2_split; + __uint32_t xs_bmbt_2_join; + __uint32_t xs_bmbt_2_alloc; + __uint32_t xs_bmbt_2_free; + __uint32_t xs_bmbt_2_moves; +#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15) + __uint32_t xs_ibt_2_lookup; + __uint32_t xs_ibt_2_compare; + __uint32_t xs_ibt_2_insrec; + __uint32_t xs_ibt_2_delrec; + __uint32_t xs_ibt_2_newroot; + __uint32_t xs_ibt_2_killroot; + __uint32_t xs_ibt_2_increment; + __uint32_t xs_ibt_2_decrement; + __uint32_t xs_ibt_2_lshift; + __uint32_t xs_ibt_2_rshift; + __uint32_t xs_ibt_2_split; + __uint32_t xs_ibt_2_join; + __uint32_t xs_ibt_2_alloc; + __uint32_t xs_ibt_2_free; + __uint32_t xs_ibt_2_moves; +/* Extra precision counters */ + __uint64_t xs_xstrat_bytes; + __uint64_t xs_write_bytes; + __uint64_t xs_read_bytes; +}; + +DECLARE_PER_CPU(struct xfsstats, xfsstats); + +/* + * We don't disable preempt, not too worried about poking the + * wrong CPU's stat for now (also aggregated before reporting). + */ +#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++) +#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) +#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) + +extern int xfs_init_procfs(void); +extern void xfs_cleanup_procfs(void); + + +#else /* !CONFIG_PROC_FS */ + +# define XFS_STATS_INC(count) +# define XFS_STATS_DEC(count) +# define XFS_STATS_ADD(count, inc) + +static inline int xfs_init_procfs(void) +{ + return 0; +} + +static inline void xfs_cleanup_procfs(void) +{ +} + +#endif /* !CONFIG_PROC_FS */ + +#endif /* __XFS_STATS_H__ */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c new file mode 100644 index 000000000000..9a72dda58bd0 --- /dev/null +++ b/fs/xfs/xfs_super.c @@ -0,0 +1,1773 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_ialloc.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_itable.h" +#include "xfs_fsops.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_utils.h" +#include "xfs_vnodeops.h" +#include "xfs_log_priv.h" +#include "xfs_trans_priv.h" +#include "xfs_filestream.h" +#include "xfs_da_btree.h" +#include "xfs_extfree_item.h" +#include "xfs_mru_cache.h" +#include "xfs_inode_item.h" +#include "xfs_sync.h" +#include "xfs_trace.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct super_operations xfs_super_operations; +static kmem_zone_t *xfs_ioend_zone; +mempool_t *xfs_ioend_pool; + +#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ +#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ +#define MNTOPT_LOGDEV "logdev" /* log device */ +#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */ +#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */ +#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */ +#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */ +#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */ +#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */ +#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */ +#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */ +#define MNTOPT_MTPT "mtpt" /* filesystem mount point */ +#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */ +#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */ +#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */ +#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */ +#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */ +#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */ +#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and + * unwritten extent conversion */ +#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */ +#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */ +#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */ +#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */ +#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */ +#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes + * in stat(). */ +#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */ +#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */ +#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */ +#define MNTOPT_QUOTA "quota" /* disk quotas (user) */ +#define MNTOPT_NOQUOTA "noquota" /* no quotas */ +#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */ +#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */ +#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */ +#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */ +#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */ +#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */ +#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */ +#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */ +#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */ +#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */ +#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */ +#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */ +#define MNTOPT_DISCARD "discard" /* Discard unused blocks */ +#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */ + +/* + * Table driven mount option parser. + * + * Currently only used for remount, but it will be used for mount + * in the future, too. + */ +enum { + Opt_barrier, Opt_nobarrier, Opt_err +}; + +static const match_table_t tokens = { + {Opt_barrier, "barrier"}, + {Opt_nobarrier, "nobarrier"}, + {Opt_err, NULL} +}; + + +STATIC unsigned long +suffix_strtoul(char *s, char **endp, unsigned int base) +{ + int last, shift_left_factor = 0; + char *value = s; + + last = strlen(value) - 1; + if (value[last] == 'K' || value[last] == 'k') { + shift_left_factor = 10; + value[last] = '\0'; + } + if (value[last] == 'M' || value[last] == 'm') { + shift_left_factor = 20; + value[last] = '\0'; + } + if (value[last] == 'G' || value[last] == 'g') { + shift_left_factor = 30; + value[last] = '\0'; + } + + return simple_strtoul((const char *)s, endp, base) << shift_left_factor; +} + +/* + * This function fills in xfs_mount_t fields based on mount args. + * Note: the superblock has _not_ yet been read in. + * + * Note that this function leaks the various device name allocations on + * failure. The caller takes care of them. + */ +STATIC int +xfs_parseargs( + struct xfs_mount *mp, + char *options) +{ + struct super_block *sb = mp->m_super; + char *this_char, *value, *eov; + int dsunit = 0; + int dswidth = 0; + int iosize = 0; + __uint8_t iosizelog = 0; + + /* + * set up the mount name first so all the errors will refer to the + * correct device. + */ + mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_fsname) + return ENOMEM; + mp->m_fsname_len = strlen(mp->m_fsname) + 1; + + /* + * Copy binary VFS mount flags we are interested in. + */ + if (sb->s_flags & MS_RDONLY) + mp->m_flags |= XFS_MOUNT_RDONLY; + if (sb->s_flags & MS_DIRSYNC) + mp->m_flags |= XFS_MOUNT_DIRSYNC; + if (sb->s_flags & MS_SYNCHRONOUS) + mp->m_flags |= XFS_MOUNT_WSYNC; + + /* + * Set some default flags that could be cleared by the mount option + * parsing. + */ + mp->m_flags |= XFS_MOUNT_BARRIER; + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + mp->m_flags |= XFS_MOUNT_DELAYLOG; + + /* + * These can be overridden by the mount option parsing. + */ + mp->m_logbufs = -1; + mp->m_logbsize = -1; + + if (!options) + goto done; + + while ((this_char = strsep(&options, ",")) != NULL) { + if (!*this_char) + continue; + if ((value = strchr(this_char, '=')) != NULL) + *value++ = 0; + + if (!strcmp(this_char, MNTOPT_LOGBUFS)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + mp->m_logbufs = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + mp->m_logbsize = suffix_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_logname) + return ENOMEM; + } else if (!strcmp(this_char, MNTOPT_MTPT)) { + xfs_warn(mp, "%s option not allowed on this system", + this_char); + return EINVAL; + } else if (!strcmp(this_char, MNTOPT_RTDEV)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_rtname) + return ENOMEM; + } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + iosize = simple_strtoul(value, &eov, 10); + iosizelog = ffs(iosize) - 1; + } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + iosize = suffix_strtoul(value, &eov, 10); + iosizelog = ffs(iosize) - 1; + } else if (!strcmp(this_char, MNTOPT_GRPID) || + !strcmp(this_char, MNTOPT_BSDGROUPS)) { + mp->m_flags |= XFS_MOUNT_GRPID; + } else if (!strcmp(this_char, MNTOPT_NOGRPID) || + !strcmp(this_char, MNTOPT_SYSVGROUPS)) { + mp->m_flags &= ~XFS_MOUNT_GRPID; + } else if (!strcmp(this_char, MNTOPT_WSYNC)) { + mp->m_flags |= XFS_MOUNT_WSYNC; + } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { + mp->m_flags |= XFS_MOUNT_NORECOVERY; + } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { + mp->m_flags |= XFS_MOUNT_NOALIGN; + } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { + mp->m_flags |= XFS_MOUNT_SWALLOC; + } else if (!strcmp(this_char, MNTOPT_SUNIT)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + dsunit = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_SWIDTH)) { + if (!value || !*value) { + xfs_warn(mp, "%s option requires an argument", + this_char); + return EINVAL; + } + dswidth = simple_strtoul(value, &eov, 10); + } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { + mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; +#if !XFS_BIG_INUMS + xfs_warn(mp, "%s option not allowed on this system", + this_char); + return EINVAL; +#endif + } else if (!strcmp(this_char, MNTOPT_NOUUID)) { + mp->m_flags |= XFS_MOUNT_NOUUID; + } else if (!strcmp(this_char, MNTOPT_BARRIER)) { + mp->m_flags |= XFS_MOUNT_BARRIER; + } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { + mp->m_flags &= ~XFS_MOUNT_BARRIER; + } else if (!strcmp(this_char, MNTOPT_IKEEP)) { + mp->m_flags |= XFS_MOUNT_IKEEP; + } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { + mp->m_flags &= ~XFS_MOUNT_IKEEP; + } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { + mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; + } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + } else if (!strcmp(this_char, MNTOPT_ATTR2)) { + mp->m_flags |= XFS_MOUNT_ATTR2; + } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { + mp->m_flags &= ~XFS_MOUNT_ATTR2; + mp->m_flags |= XFS_MOUNT_NOATTR2; + } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { + mp->m_flags |= XFS_MOUNT_FILESTREAMS; + } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { + mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); + } else if (!strcmp(this_char, MNTOPT_QUOTA) || + !strcmp(this_char, MNTOPT_UQUOTA) || + !strcmp(this_char, MNTOPT_USRQUOTA)) { + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_UQUOTA_ENFD); + } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || + !strcmp(this_char, MNTOPT_UQUOTANOENF)) { + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_UQUOTA_ENFD; + } else if (!strcmp(this_char, MNTOPT_PQUOTA) || + !strcmp(this_char, MNTOPT_PRJQUOTA)) { + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_OQUOTA_ENFD); + } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_OQUOTA_ENFD; + } else if (!strcmp(this_char, MNTOPT_GQUOTA) || + !strcmp(this_char, MNTOPT_GRPQUOTA)) { + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_OQUOTA_ENFD); + } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_OQUOTA_ENFD; + } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) { + mp->m_flags |= XFS_MOUNT_DELAYLOG; + } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) { + mp->m_flags &= ~XFS_MOUNT_DELAYLOG; + } else if (!strcmp(this_char, MNTOPT_DISCARD)) { + mp->m_flags |= XFS_MOUNT_DISCARD; + } else if (!strcmp(this_char, MNTOPT_NODISCARD)) { + mp->m_flags &= ~XFS_MOUNT_DISCARD; + } else if (!strcmp(this_char, "ihashsize")) { + xfs_warn(mp, + "ihashsize no longer used, option is deprecated."); + } else if (!strcmp(this_char, "osyncisdsync")) { + xfs_warn(mp, + "osyncisdsync has no effect, option is deprecated."); + } else if (!strcmp(this_char, "osyncisosync")) { + xfs_warn(mp, + "osyncisosync has no effect, option is deprecated."); + } else if (!strcmp(this_char, "irixsgid")) { + xfs_warn(mp, + "irixsgid is now a sysctl(2) variable, option is deprecated."); + } else { + xfs_warn(mp, "unknown mount option [%s].", this_char); + return EINVAL; + } + } + + /* + * no recovery flag requires a read-only mount + */ + if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && + !(mp->m_flags & XFS_MOUNT_RDONLY)) { + xfs_warn(mp, "no-recovery mounts must be read-only."); + return EINVAL; + } + + if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { + xfs_warn(mp, + "sunit and swidth options incompatible with the noalign option"); + return EINVAL; + } + + if ((mp->m_flags & XFS_MOUNT_DISCARD) && + !(mp->m_flags & XFS_MOUNT_DELAYLOG)) { + xfs_warn(mp, + "the discard option is incompatible with the nodelaylog option"); + return EINVAL; + } + +#ifndef CONFIG_XFS_QUOTA + if (XFS_IS_QUOTA_RUNNING(mp)) { + xfs_warn(mp, "quota support not available in this kernel."); + return EINVAL; + } +#endif + + if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && + (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { + xfs_warn(mp, "cannot mount with both project and group quota"); + return EINVAL; + } + + if ((dsunit && !dswidth) || (!dsunit && dswidth)) { + xfs_warn(mp, "sunit and swidth must be specified together"); + return EINVAL; + } + + if (dsunit && (dswidth % dsunit != 0)) { + xfs_warn(mp, + "stripe width (%d) must be a multiple of the stripe unit (%d)", + dswidth, dsunit); + return EINVAL; + } + +done: + if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { + /* + * At this point the superblock has not been read + * in, therefore we do not know the block size. + * Before the mount call ends we will convert + * these to FSBs. + */ + if (dsunit) { + mp->m_dalign = dsunit; + mp->m_flags |= XFS_MOUNT_RETERR; + } + + if (dswidth) + mp->m_swidth = dswidth; + } + + if (mp->m_logbufs != -1 && + mp->m_logbufs != 0 && + (mp->m_logbufs < XLOG_MIN_ICLOGS || + mp->m_logbufs > XLOG_MAX_ICLOGS)) { + xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]", + mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); + return XFS_ERROR(EINVAL); + } + if (mp->m_logbsize != -1 && + mp->m_logbsize != 0 && + (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || + mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || + !is_power_of_2(mp->m_logbsize))) { + xfs_warn(mp, + "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", + mp->m_logbsize); + return XFS_ERROR(EINVAL); + } + + if (iosizelog) { + if (iosizelog > XFS_MAX_IO_LOG || + iosizelog < XFS_MIN_IO_LOG) { + xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", + iosizelog, XFS_MIN_IO_LOG, + XFS_MAX_IO_LOG); + return XFS_ERROR(EINVAL); + } + + mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; + mp->m_readio_log = iosizelog; + mp->m_writeio_log = iosizelog; + } + + return 0; +} + +struct proc_xfs_info { + int flag; + char *str; +}; + +STATIC int +xfs_showargs( + struct xfs_mount *mp, + struct seq_file *m) +{ + static struct proc_xfs_info xfs_info_set[] = { + /* the few simple ones we can get from the mount struct */ + { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP }, + { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC }, + { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN }, + { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC }, + { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID }, + { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY }, + { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 }, + { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM }, + { XFS_MOUNT_GRPID, "," MNTOPT_GRPID }, + { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG }, + { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD }, + { 0, NULL } + }; + static struct proc_xfs_info xfs_info_unset[] = { + /* the few simple ones we can get from the mount struct */ + { XFS_MOUNT_COMPAT_IOSIZE, "," MNTOPT_LARGEIO }, + { XFS_MOUNT_BARRIER, "," MNTOPT_NOBARRIER }, + { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_64BITINODE }, + { 0, NULL } + }; + struct proc_xfs_info *xfs_infop; + + for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { + if (mp->m_flags & xfs_infop->flag) + seq_puts(m, xfs_infop->str); + } + for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) { + if (!(mp->m_flags & xfs_infop->flag)) + seq_puts(m, xfs_infop->str); + } + + if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) + seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk", + (int)(1 << mp->m_writeio_log) >> 10); + + if (mp->m_logbufs > 0) + seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs); + if (mp->m_logbsize > 0) + seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10); + + if (mp->m_logname) + seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname); + if (mp->m_rtname) + seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname); + + if (mp->m_dalign > 0) + seq_printf(m, "," MNTOPT_SUNIT "=%d", + (int)XFS_FSB_TO_BB(mp, mp->m_dalign)); + if (mp->m_swidth > 0) + seq_printf(m, "," MNTOPT_SWIDTH "=%d", + (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); + + if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) + seq_puts(m, "," MNTOPT_USRQUOTA); + else if (mp->m_qflags & XFS_UQUOTA_ACCT) + seq_puts(m, "," MNTOPT_UQUOTANOENF); + + /* Either project or group quotas can be active, not both */ + + if (mp->m_qflags & XFS_PQUOTA_ACCT) { + if (mp->m_qflags & XFS_OQUOTA_ENFD) + seq_puts(m, "," MNTOPT_PRJQUOTA); + else + seq_puts(m, "," MNTOPT_PQUOTANOENF); + } else if (mp->m_qflags & XFS_GQUOTA_ACCT) { + if (mp->m_qflags & XFS_OQUOTA_ENFD) + seq_puts(m, "," MNTOPT_GRPQUOTA); + else + seq_puts(m, "," MNTOPT_GQUOTANOENF); + } + + if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) + seq_puts(m, "," MNTOPT_NOQUOTA); + + return 0; +} +__uint64_t +xfs_max_file_offset( + unsigned int blockshift) +{ + unsigned int pagefactor = 1; + unsigned int bitshift = BITS_PER_LONG - 1; + + /* Figure out maximum filesize, on Linux this can depend on + * the filesystem blocksize (on 32 bit platforms). + * __block_write_begin does this in an [unsigned] long... + * page->index << (PAGE_CACHE_SHIFT - bbits) + * So, for page sized blocks (4K on 32 bit platforms), + * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is + * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) + * but for smaller blocksizes it is less (bbits = log2 bsize). + * Note1: get_block_t takes a long (implicit cast from above) + * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch + * can optionally convert the [unsigned] long from above into + * an [unsigned] long long. + */ + +#if BITS_PER_LONG == 32 +# if defined(CONFIG_LBDAF) + ASSERT(sizeof(sector_t) == 8); + pagefactor = PAGE_CACHE_SIZE; + bitshift = BITS_PER_LONG; +# else + pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift); +# endif +#endif + + return (((__uint64_t)pagefactor) << bitshift) - 1; +} + +STATIC int +xfs_blkdev_get( + xfs_mount_t *mp, + const char *name, + struct block_device **bdevp) +{ + int error = 0; + + *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL, + mp); + if (IS_ERR(*bdevp)) { + error = PTR_ERR(*bdevp); + xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error); + } + + return -error; +} + +STATIC void +xfs_blkdev_put( + struct block_device *bdev) +{ + if (bdev) + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); +} + +void +xfs_blkdev_issue_flush( + xfs_buftarg_t *buftarg) +{ + blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); +} + +STATIC void +xfs_close_devices( + struct xfs_mount *mp) +{ + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { + struct block_device *logdev = mp->m_logdev_targp->bt_bdev; + xfs_free_buftarg(mp, mp->m_logdev_targp); + xfs_blkdev_put(logdev); + } + if (mp->m_rtdev_targp) { + struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; + xfs_free_buftarg(mp, mp->m_rtdev_targp); + xfs_blkdev_put(rtdev); + } + xfs_free_buftarg(mp, mp->m_ddev_targp); +} + +/* + * The file system configurations are: + * (1) device (partition) with data and internal log + * (2) logical volume with data and log subvolumes. + * (3) logical volume with data, log, and realtime subvolumes. + * + * We only have to handle opening the log and realtime volumes here if + * they are present. The data subvolume has already been opened by + * get_sb_bdev() and is stored in sb->s_bdev. + */ +STATIC int +xfs_open_devices( + struct xfs_mount *mp) +{ + struct block_device *ddev = mp->m_super->s_bdev; + struct block_device *logdev = NULL, *rtdev = NULL; + int error; + + /* + * Open real time and log devices - order is important. + */ + if (mp->m_logname) { + error = xfs_blkdev_get(mp, mp->m_logname, &logdev); + if (error) + goto out; + } + + if (mp->m_rtname) { + error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); + if (error) + goto out_close_logdev; + + if (rtdev == ddev || rtdev == logdev) { + xfs_warn(mp, + "Cannot mount filesystem with identical rtdev and ddev/logdev."); + error = EINVAL; + goto out_close_rtdev; + } + } + + /* + * Setup xfs_mount buffer target pointers + */ + error = ENOMEM; + mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); + if (!mp->m_ddev_targp) + goto out_close_rtdev; + + if (rtdev) { + mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, + mp->m_fsname); + if (!mp->m_rtdev_targp) + goto out_free_ddev_targ; + } + + if (logdev && logdev != ddev) { + mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, + mp->m_fsname); + if (!mp->m_logdev_targp) + goto out_free_rtdev_targ; + } else { + mp->m_logdev_targp = mp->m_ddev_targp; + } + + return 0; + + out_free_rtdev_targ: + if (mp->m_rtdev_targp) + xfs_free_buftarg(mp, mp->m_rtdev_targp); + out_free_ddev_targ: + xfs_free_buftarg(mp, mp->m_ddev_targp); + out_close_rtdev: + if (rtdev) + xfs_blkdev_put(rtdev); + out_close_logdev: + if (logdev && logdev != ddev) + xfs_blkdev_put(logdev); + out: + return error; +} + +/* + * Setup xfs_mount buffer target pointers based on superblock + */ +STATIC int +xfs_setup_devices( + struct xfs_mount *mp) +{ + int error; + + error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, + mp->m_sb.sb_sectsize); + if (error) + return error; + + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { + unsigned int log_sector_size = BBSIZE; + + if (xfs_sb_version_hassector(&mp->m_sb)) + log_sector_size = mp->m_sb.sb_logsectsize; + error = xfs_setsize_buftarg(mp->m_logdev_targp, + mp->m_sb.sb_blocksize, + log_sector_size); + if (error) + return error; + } + if (mp->m_rtdev_targp) { + error = xfs_setsize_buftarg(mp->m_rtdev_targp, + mp->m_sb.sb_blocksize, + mp->m_sb.sb_sectsize); + if (error) + return error; + } + + return 0; +} + +/* Catch misguided souls that try to use this interface on XFS */ +STATIC struct inode * +xfs_fs_alloc_inode( + struct super_block *sb) +{ + BUG(); + return NULL; +} + +/* + * Now that the generic code is guaranteed not to be accessing + * the linux inode, we can reclaim the inode. + */ +STATIC void +xfs_fs_destroy_inode( + struct inode *inode) +{ + struct xfs_inode *ip = XFS_I(inode); + + trace_xfs_destroy_inode(ip); + + XFS_STATS_INC(vn_reclaim); + + /* bad inode, get out here ASAP */ + if (is_bad_inode(inode)) + goto out_reclaim; + + xfs_ioend_wait(ip); + + ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); + + /* + * We should never get here with one of the reclaim flags already set. + */ + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); + ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); + + /* + * We always use background reclaim here because even if the + * inode is clean, it still may be under IO and hence we have + * to take the flush lock. The background reclaim path handles + * this more efficiently than we can here, so simply let background + * reclaim tear down all inodes. + */ +out_reclaim: + xfs_inode_set_reclaim_tag(ip); +} + +/* + * Slab object creation initialisation for the XFS inode. + * This covers only the idempotent fields in the XFS inode; + * all other fields need to be initialised on allocation + * from the slab. This avoids the need to repeatedly initialise + * fields in the xfs inode that left in the initialise state + * when freeing the inode. + */ +STATIC void +xfs_fs_inode_init_once( + void *inode) +{ + struct xfs_inode *ip = inode; + + memset(ip, 0, sizeof(struct xfs_inode)); + + /* vfs inode */ + inode_init_once(VFS_I(ip)); + + /* xfs inode */ + atomic_set(&ip->i_iocount, 0); + atomic_set(&ip->i_pincount, 0); + spin_lock_init(&ip->i_flags_lock); + init_waitqueue_head(&ip->i_ipin_wait); + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&ip->i_flush); + complete(&ip->i_flush); + + mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, + "xfsino", ip->i_ino); +} + +/* + * Dirty the XFS inode when mark_inode_dirty_sync() is called so that + * we catch unlogged VFS level updates to the inode. + * + * We need the barrier() to maintain correct ordering between unlogged + * updates and the transaction commit code that clears the i_update_core + * field. This requires all updates to be completed before marking the + * inode dirty. + */ +STATIC void +xfs_fs_dirty_inode( + struct inode *inode, + int flags) +{ + barrier(); + XFS_I(inode)->i_update_core = 1; +} + +STATIC int +xfs_log_inode( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0); + + if (error) { + xfs_trans_cancel(tp, 0); + /* we need to return with the lock hold shared */ + xfs_ilock(ip, XFS_ILOCK_SHARED); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + /* + * Note - it's possible that we might have pushed ourselves out of the + * way during trans_reserve which would flush the inode. But there's + * no guarantee that the inode buffer has actually gone out yet (it's + * delwri). Plus the buffer could be pinned anyway if it's part of + * an inode in another recent transaction. So we play it safe and + * fire off the transaction anyway. + */ + xfs_trans_ijoin(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + error = xfs_trans_commit(tp, 0); + xfs_ilock_demote(ip, XFS_ILOCK_EXCL); + + return error; +} + +STATIC int +xfs_fs_write_inode( + struct inode *inode, + struct writeback_control *wbc) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + int error = EAGAIN; + + trace_xfs_write_inode(ip); + + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + if (wbc->sync_mode == WB_SYNC_ALL) { + /* + * Make sure the inode has made it it into the log. Instead + * of forcing it all the way to stable storage using a + * synchronous transaction we let the log force inside the + * ->sync_fs call do that for thus, which reduces the number + * of synchronous log foces dramatically. + */ + xfs_ioend_wait(ip); + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (ip->i_update_core) { + error = xfs_log_inode(ip); + if (error) + goto out_unlock; + } + } else { + /* + * We make this non-blocking if the inode is contended, return + * EAGAIN to indicate to the caller that they did not succeed. + * This prevents the flush path from blocking on inodes inside + * another operation right now, they get caught later by + * xfs_sync. + */ + if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) + goto out; + + if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip)) + goto out_unlock; + + /* + * Now we have the flush lock and the inode is not pinned, we + * can check if the inode is really clean as we know that + * there are no pending transaction completions, it is not + * waiting on the delayed write queue and there is no IO in + * progress. + */ + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + error = 0; + goto out_unlock; + } + error = xfs_iflush(ip, SYNC_TRYLOCK); + } + + out_unlock: + xfs_iunlock(ip, XFS_ILOCK_SHARED); + out: + /* + * if we failed to write out the inode then mark + * it dirty again so we'll try again later. + */ + if (error) + xfs_mark_inode_dirty_sync(ip); + return -error; +} + +STATIC void +xfs_fs_evict_inode( + struct inode *inode) +{ + xfs_inode_t *ip = XFS_I(inode); + + trace_xfs_evict_inode(ip); + + truncate_inode_pages(&inode->i_data, 0); + end_writeback(inode); + XFS_STATS_INC(vn_rele); + XFS_STATS_INC(vn_remove); + XFS_STATS_DEC(vn_active); + + /* + * The iolock is used by the file system to coordinate reads, + * writes, and block truncates. Up to this point the lock + * protected concurrent accesses by users of the inode. But + * from here forward we're doing some final processing of the + * inode because we're done with it, and although we reuse the + * iolock for protection it is really a distinct lock class + * (in the lockdep sense) from before. To keep lockdep happy + * (and basically indicate what we are doing), we explicitly + * re-init the iolock here. + */ + ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); + lockdep_set_class_and_name(&ip->i_iolock.mr_lock, + &xfs_iolock_reclaimable, "xfs_iolock_reclaimable"); + + xfs_inactive(ip); +} + +STATIC void +xfs_free_fsname( + struct xfs_mount *mp) +{ + kfree(mp->m_fsname); + kfree(mp->m_rtname); + kfree(mp->m_logname); +} + +STATIC void +xfs_fs_put_super( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_syncd_stop(mp); + + /* + * Blow away any referenced inode in the filestreams cache. + * This can and will cause log traffic as inodes go inactive + * here. + */ + xfs_filestream_unmount(mp); + + XFS_bflush(mp->m_ddev_targp); + + xfs_unmountfs(mp); + xfs_freesb(mp); + xfs_icsb_destroy_counters(mp); + xfs_close_devices(mp); + xfs_free_fsname(mp); + kfree(mp); +} + +STATIC int +xfs_fs_sync_fs( + struct super_block *sb, + int wait) +{ + struct xfs_mount *mp = XFS_M(sb); + int error; + + /* + * Not much we can do for the first async pass. Writing out the + * superblock would be counter-productive as we are going to redirty + * when writing out other data and metadata (and writing out a single + * block is quite fast anyway). + * + * Try to asynchronously kick off quota syncing at least. + */ + if (!wait) { + xfs_qm_sync(mp, SYNC_TRYLOCK); + return 0; + } + + error = xfs_quiesce_data(mp); + if (error) + return -error; + + if (laptop_mode) { + /* + * The disk must be active because we're syncing. + * We schedule xfssyncd now (now that the disk is + * active) instead of later (when it might not be). + */ + flush_delayed_work_sync(&mp->m_sync_work); + } + + return 0; +} + +STATIC int +xfs_fs_statfs( + struct dentry *dentry, + struct kstatfs *statp) +{ + struct xfs_mount *mp = XFS_M(dentry->d_sb); + xfs_sb_t *sbp = &mp->m_sb; + struct xfs_inode *ip = XFS_I(dentry->d_inode); + __uint64_t fakeinos, id; + xfs_extlen_t lsize; + __int64_t ffree; + + statp->f_type = XFS_SB_MAGIC; + statp->f_namelen = MAXNAMELEN - 1; + + id = huge_encode_dev(mp->m_ddev_targp->bt_dev); + statp->f_fsid.val[0] = (u32)id; + statp->f_fsid.val[1] = (u32)(id >> 32); + + xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT); + + spin_lock(&mp->m_sb_lock); + statp->f_bsize = sbp->sb_blocksize; + lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; + statp->f_blocks = sbp->sb_dblocks - lsize; + statp->f_bfree = statp->f_bavail = + sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); + fakeinos = statp->f_bfree << sbp->sb_inopblog; + statp->f_files = + MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER); + if (mp->m_maxicount) + statp->f_files = min_t(typeof(statp->f_files), + statp->f_files, + mp->m_maxicount); + + /* make sure statp->f_ffree does not underflow */ + ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree); + statp->f_ffree = max_t(__int64_t, ffree, 0); + + spin_unlock(&mp->m_sb_lock); + + if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) || + ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) == + (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD)) + xfs_qm_statvfs(ip, statp); + return 0; +} + +STATIC void +xfs_save_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks = 0; + + mp->m_resblks_save = mp->m_resblks; + xfs_reserve_blocks(mp, &resblks, NULL); +} + +STATIC void +xfs_restore_resvblks(struct xfs_mount *mp) +{ + __uint64_t resblks; + + if (mp->m_resblks_save) { + resblks = mp->m_resblks_save; + mp->m_resblks_save = 0; + } else + resblks = xfs_default_resblks(mp); + + xfs_reserve_blocks(mp, &resblks, NULL); +} + +STATIC int +xfs_fs_remount( + struct super_block *sb, + int *flags, + char *options) +{ + struct xfs_mount *mp = XFS_M(sb); + substring_t args[MAX_OPT_ARGS]; + char *p; + int error; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_barrier: + mp->m_flags |= XFS_MOUNT_BARRIER; + break; + case Opt_nobarrier: + mp->m_flags &= ~XFS_MOUNT_BARRIER; + break; + default: + /* + * Logically we would return an error here to prevent + * users from believing they might have changed + * mount options using remount which can't be changed. + * + * But unfortunately mount(8) adds all options from + * mtab and fstab to the mount arguments in some cases + * so we can't blindly reject options, but have to + * check for each specified option if it actually + * differs from the currently set option and only + * reject it if that's the case. + * + * Until that is implemented we return success for + * every remount request, and silently ignore all + * options that we can't actually change. + */ +#if 0 + xfs_info(mp, + "mount option \"%s\" not supported for remount\n", p); + return -EINVAL; +#else + break; +#endif + } + } + + /* ro -> rw */ + if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { + mp->m_flags &= ~XFS_MOUNT_RDONLY; + + /* + * If this is the first remount to writeable state we + * might have some superblock changes to update. + */ + if (mp->m_update_flags) { + error = xfs_mount_log_sb(mp, mp->m_update_flags); + if (error) { + xfs_warn(mp, "failed to write sb changes"); + return error; + } + mp->m_update_flags = 0; + } + + /* + * Fill out the reserve pool if it is empty. Use the stashed + * value if it is non-zero, otherwise go with the default. + */ + xfs_restore_resvblks(mp); + } + + /* rw -> ro */ + if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + /* + * After we have synced the data but before we sync the + * metadata, we need to free up the reserve block pool so that + * the used block count in the superblock on disk is correct at + * the end of the remount. Stash the current reserve pool size + * so that if we get remounted rw, we can return it to the same + * size. + */ + + xfs_quiesce_data(mp); + xfs_save_resvblks(mp); + xfs_quiesce_attr(mp); + mp->m_flags |= XFS_MOUNT_RDONLY; + } + + return 0; +} + +/* + * Second stage of a freeze. The data is already frozen so we only + * need to take care of the metadata. Once that's done write a dummy + * record to dirty the log in case of a crash while frozen. + */ +STATIC int +xfs_fs_freeze( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_save_resvblks(mp); + xfs_quiesce_attr(mp); + return -xfs_fs_log_dummy(mp); +} + +STATIC int +xfs_fs_unfreeze( + struct super_block *sb) +{ + struct xfs_mount *mp = XFS_M(sb); + + xfs_restore_resvblks(mp); + return 0; +} + +STATIC int +xfs_fs_show_options( + struct seq_file *m, + struct vfsmount *mnt) +{ + return -xfs_showargs(XFS_M(mnt->mnt_sb), m); +} + +/* + * This function fills in xfs_mount_t fields based on mount args. + * Note: the superblock _has_ now been read in. + */ +STATIC int +xfs_finish_flags( + struct xfs_mount *mp) +{ + int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); + + /* Fail a mount where the logbuf is smaller than the log stripe */ + if (xfs_sb_version_haslogv2(&mp->m_sb)) { + if (mp->m_logbsize <= 0 && + mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { + mp->m_logbsize = mp->m_sb.sb_logsunit; + } else if (mp->m_logbsize > 0 && + mp->m_logbsize < mp->m_sb.sb_logsunit) { + xfs_warn(mp, + "logbuf size must be greater than or equal to log stripe size"); + return XFS_ERROR(EINVAL); + } + } else { + /* Fail a mount if the logbuf is larger than 32K */ + if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { + xfs_warn(mp, + "logbuf size for version 1 logs must be 16K or 32K"); + return XFS_ERROR(EINVAL); + } + } + + /* + * mkfs'ed attr2 will turn on attr2 mount unless explicitly + * told by noattr2 to turn it off + */ + if (xfs_sb_version_hasattr2(&mp->m_sb) && + !(mp->m_flags & XFS_MOUNT_NOATTR2)) + mp->m_flags |= XFS_MOUNT_ATTR2; + + /* + * prohibit r/w mounts of read-only filesystems + */ + if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { + xfs_warn(mp, + "cannot mount a read-only filesystem as read-write"); + return XFS_ERROR(EROFS); + } + + return 0; +} + +STATIC int +xfs_fs_fill_super( + struct super_block *sb, + void *data, + int silent) +{ + struct inode *root; + struct xfs_mount *mp = NULL; + int flags = 0, error = ENOMEM; + + mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); + if (!mp) + goto out; + + spin_lock_init(&mp->m_sb_lock); + mutex_init(&mp->m_growlock); + atomic_set(&mp->m_active_trans, 0); + + mp->m_super = sb; + sb->s_fs_info = mp; + + error = xfs_parseargs(mp, (char *)data); + if (error) + goto out_free_fsname; + + sb_min_blocksize(sb, BBSIZE); + sb->s_xattr = xfs_xattr_handlers; + sb->s_export_op = &xfs_export_operations; +#ifdef CONFIG_XFS_QUOTA + sb->s_qcop = &xfs_quotactl_operations; +#endif + sb->s_op = &xfs_super_operations; + + if (silent) + flags |= XFS_MFSI_QUIET; + + error = xfs_open_devices(mp); + if (error) + goto out_free_fsname; + + error = xfs_icsb_init_counters(mp); + if (error) + goto out_close_devices; + + error = xfs_readsb(mp, flags); + if (error) + goto out_destroy_counters; + + error = xfs_finish_flags(mp); + if (error) + goto out_free_sb; + + error = xfs_setup_devices(mp); + if (error) + goto out_free_sb; + + error = xfs_filestream_mount(mp); + if (error) + goto out_free_sb; + + /* + * we must configure the block size in the superblock before we run the + * full mount process as the mount process can lookup and cache inodes. + * For the same reason we must also initialise the syncd and register + * the inode cache shrinker so that inodes can be reclaimed during + * operations like a quotacheck that iterate all inodes in the + * filesystem. + */ + sb->s_magic = XFS_SB_MAGIC; + sb->s_blocksize = mp->m_sb.sb_blocksize; + sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1; + sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); + sb->s_time_gran = 1; + set_posix_acl_flag(sb); + + error = xfs_mountfs(mp); + if (error) + goto out_filestream_unmount; + + error = xfs_syncd_init(mp); + if (error) + goto out_unmount; + + root = igrab(VFS_I(mp->m_rootip)); + if (!root) { + error = ENOENT; + goto out_syncd_stop; + } + if (is_bad_inode(root)) { + error = EINVAL; + goto out_syncd_stop; + } + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + error = ENOMEM; + goto out_iput; + } + + return 0; + + out_filestream_unmount: + xfs_filestream_unmount(mp); + out_free_sb: + xfs_freesb(mp); + out_destroy_counters: + xfs_icsb_destroy_counters(mp); + out_close_devices: + xfs_close_devices(mp); + out_free_fsname: + xfs_free_fsname(mp); + kfree(mp); + out: + return -error; + + out_iput: + iput(root); + out_syncd_stop: + xfs_syncd_stop(mp); + out_unmount: + /* + * Blow away any referenced inode in the filestreams cache. + * This can and will cause log traffic as inodes go inactive + * here. + */ + xfs_filestream_unmount(mp); + + XFS_bflush(mp->m_ddev_targp); + + xfs_unmountfs(mp); + goto out_free_sb; +} + +STATIC struct dentry * +xfs_fs_mount( + struct file_system_type *fs_type, + int flags, + const char *dev_name, + void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); +} + +static int +xfs_fs_nr_cached_objects( + struct super_block *sb) +{ + return xfs_reclaim_inodes_count(XFS_M(sb)); +} + +static void +xfs_fs_free_cached_objects( + struct super_block *sb, + int nr_to_scan) +{ + xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan); +} + +static const struct super_operations xfs_super_operations = { + .alloc_inode = xfs_fs_alloc_inode, + .destroy_inode = xfs_fs_destroy_inode, + .dirty_inode = xfs_fs_dirty_inode, + .write_inode = xfs_fs_write_inode, + .evict_inode = xfs_fs_evict_inode, + .put_super = xfs_fs_put_super, + .sync_fs = xfs_fs_sync_fs, + .freeze_fs = xfs_fs_freeze, + .unfreeze_fs = xfs_fs_unfreeze, + .statfs = xfs_fs_statfs, + .remount_fs = xfs_fs_remount, + .show_options = xfs_fs_show_options, + .nr_cached_objects = xfs_fs_nr_cached_objects, + .free_cached_objects = xfs_fs_free_cached_objects, +}; + +static struct file_system_type xfs_fs_type = { + .owner = THIS_MODULE, + .name = "xfs", + .mount = xfs_fs_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +STATIC int __init +xfs_init_zones(void) +{ + + xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); + if (!xfs_ioend_zone) + goto out; + + xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, + xfs_ioend_zone); + if (!xfs_ioend_pool) + goto out_destroy_ioend_zone; + + xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), + "xfs_log_ticket"); + if (!xfs_log_ticket_zone) + goto out_destroy_ioend_pool; + + xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), + "xfs_bmap_free_item"); + if (!xfs_bmap_free_item_zone) + goto out_destroy_log_ticket_zone; + + xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), + "xfs_btree_cur"); + if (!xfs_btree_cur_zone) + goto out_destroy_bmap_free_item_zone; + + xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), + "xfs_da_state"); + if (!xfs_da_state_zone) + goto out_destroy_btree_cur_zone; + + xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); + if (!xfs_dabuf_zone) + goto out_destroy_da_state_zone; + + xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); + if (!xfs_ifork_zone) + goto out_destroy_dabuf_zone; + + xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); + if (!xfs_trans_zone) + goto out_destroy_ifork_zone; + + xfs_log_item_desc_zone = + kmem_zone_init(sizeof(struct xfs_log_item_desc), + "xfs_log_item_desc"); + if (!xfs_log_item_desc_zone) + goto out_destroy_trans_zone; + + /* + * The size of the zone allocated buf log item is the maximum + * size possible under XFS. This wastes a little bit of memory, + * but it is much faster. + */ + xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + + (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / + NBWORD) * sizeof(int))), "xfs_buf_item"); + if (!xfs_buf_item_zone) + goto out_destroy_log_item_desc_zone; + + xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + + ((XFS_EFD_MAX_FAST_EXTENTS - 1) * + sizeof(xfs_extent_t))), "xfs_efd_item"); + if (!xfs_efd_zone) + goto out_destroy_buf_item_zone; + + xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) + + ((XFS_EFI_MAX_FAST_EXTENTS - 1) * + sizeof(xfs_extent_t))), "xfs_efi_item"); + if (!xfs_efi_zone) + goto out_destroy_efd_zone; + + xfs_inode_zone = + kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", + KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, + xfs_fs_inode_init_once); + if (!xfs_inode_zone) + goto out_destroy_efi_zone; + + xfs_ili_zone = + kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", + KM_ZONE_SPREAD, NULL); + if (!xfs_ili_zone) + goto out_destroy_inode_zone; + + return 0; + + out_destroy_inode_zone: + kmem_zone_destroy(xfs_inode_zone); + out_destroy_efi_zone: + kmem_zone_destroy(xfs_efi_zone); + out_destroy_efd_zone: + kmem_zone_destroy(xfs_efd_zone); + out_destroy_buf_item_zone: + kmem_zone_destroy(xfs_buf_item_zone); + out_destroy_log_item_desc_zone: + kmem_zone_destroy(xfs_log_item_desc_zone); + out_destroy_trans_zone: + kmem_zone_destroy(xfs_trans_zone); + out_destroy_ifork_zone: + kmem_zone_destroy(xfs_ifork_zone); + out_destroy_dabuf_zone: + kmem_zone_destroy(xfs_dabuf_zone); + out_destroy_da_state_zone: + kmem_zone_destroy(xfs_da_state_zone); + out_destroy_btree_cur_zone: + kmem_zone_destroy(xfs_btree_cur_zone); + out_destroy_bmap_free_item_zone: + kmem_zone_destroy(xfs_bmap_free_item_zone); + out_destroy_log_ticket_zone: + kmem_zone_destroy(xfs_log_ticket_zone); + out_destroy_ioend_pool: + mempool_destroy(xfs_ioend_pool); + out_destroy_ioend_zone: + kmem_zone_destroy(xfs_ioend_zone); + out: + return -ENOMEM; +} + +STATIC void +xfs_destroy_zones(void) +{ + kmem_zone_destroy(xfs_ili_zone); + kmem_zone_destroy(xfs_inode_zone); + kmem_zone_destroy(xfs_efi_zone); + kmem_zone_destroy(xfs_efd_zone); + kmem_zone_destroy(xfs_buf_item_zone); + kmem_zone_destroy(xfs_log_item_desc_zone); + kmem_zone_destroy(xfs_trans_zone); + kmem_zone_destroy(xfs_ifork_zone); + kmem_zone_destroy(xfs_dabuf_zone); + kmem_zone_destroy(xfs_da_state_zone); + kmem_zone_destroy(xfs_btree_cur_zone); + kmem_zone_destroy(xfs_bmap_free_item_zone); + kmem_zone_destroy(xfs_log_ticket_zone); + mempool_destroy(xfs_ioend_pool); + kmem_zone_destroy(xfs_ioend_zone); + +} + +STATIC int __init +xfs_init_workqueues(void) +{ + /* + * max_active is set to 8 to give enough concurency to allow + * multiple work operations on each CPU to run. This allows multiple + * filesystems to be running sync work concurrently, and scales with + * the number of CPUs in the system. + */ + xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8); + if (!xfs_syncd_wq) + goto out; + + xfs_ail_wq = alloc_workqueue("xfsail", WQ_CPU_INTENSIVE, 8); + if (!xfs_ail_wq) + goto out_destroy_syncd; + + return 0; + +out_destroy_syncd: + destroy_workqueue(xfs_syncd_wq); +out: + return -ENOMEM; +} + +STATIC void +xfs_destroy_workqueues(void) +{ + destroy_workqueue(xfs_ail_wq); + destroy_workqueue(xfs_syncd_wq); +} + +STATIC int __init +init_xfs_fs(void) +{ + int error; + + printk(KERN_INFO XFS_VERSION_STRING " with " + XFS_BUILD_OPTIONS " enabled\n"); + + xfs_ioend_init(); + xfs_dir_startup(); + + error = xfs_init_zones(); + if (error) + goto out; + + error = xfs_init_workqueues(); + if (error) + goto out_destroy_zones; + + error = xfs_mru_cache_init(); + if (error) + goto out_destroy_wq; + + error = xfs_filestream_init(); + if (error) + goto out_mru_cache_uninit; + + error = xfs_buf_init(); + if (error) + goto out_filestream_uninit; + + error = xfs_init_procfs(); + if (error) + goto out_buf_terminate; + + error = xfs_sysctl_register(); + if (error) + goto out_cleanup_procfs; + + vfs_initquota(); + + error = register_filesystem(&xfs_fs_type); + if (error) + goto out_sysctl_unregister; + return 0; + + out_sysctl_unregister: + xfs_sysctl_unregister(); + out_cleanup_procfs: + xfs_cleanup_procfs(); + out_buf_terminate: + xfs_buf_terminate(); + out_filestream_uninit: + xfs_filestream_uninit(); + out_mru_cache_uninit: + xfs_mru_cache_uninit(); + out_destroy_wq: + xfs_destroy_workqueues(); + out_destroy_zones: + xfs_destroy_zones(); + out: + return error; +} + +STATIC void __exit +exit_xfs_fs(void) +{ + vfs_exitquota(); + unregister_filesystem(&xfs_fs_type); + xfs_sysctl_unregister(); + xfs_cleanup_procfs(); + xfs_buf_terminate(); + xfs_filestream_uninit(); + xfs_mru_cache_uninit(); + xfs_destroy_workqueues(); + xfs_destroy_zones(); +} + +module_init(init_xfs_fs); +module_exit(exit_xfs_fs); + +MODULE_AUTHOR("Silicon Graphics, Inc."); +MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); +MODULE_LICENSE("GPL"); diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h new file mode 100644 index 000000000000..50a3266c999e --- /dev/null +++ b/fs/xfs/xfs_super.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SUPER_H__ +#define __XFS_SUPER_H__ + +#include + +#ifdef CONFIG_XFS_QUOTA +extern void xfs_qm_init(void); +extern void xfs_qm_exit(void); +# define vfs_initquota() xfs_qm_init() +# define vfs_exitquota() xfs_qm_exit() +#else +# define vfs_initquota() do { } while (0) +# define vfs_exitquota() do { } while (0) +#endif + +#ifdef CONFIG_XFS_POSIX_ACL +# define XFS_ACL_STRING "ACLs, " +# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL) +#else +# define XFS_ACL_STRING +# define set_posix_acl_flag(sb) do { } while (0) +#endif + +#define XFS_SECURITY_STRING "security attributes, " + +#ifdef CONFIG_XFS_RT +# define XFS_REALTIME_STRING "realtime, " +#else +# define XFS_REALTIME_STRING +#endif + +#if XFS_BIG_BLKNOS +# if XFS_BIG_INUMS +# define XFS_BIGFS_STRING "large block/inode numbers, " +# else +# define XFS_BIGFS_STRING "large block numbers, " +# endif +#else +# define XFS_BIGFS_STRING +#endif + +#ifdef DEBUG +# define XFS_DBG_STRING "debug" +#else +# define XFS_DBG_STRING "no debug" +#endif + +#define XFS_VERSION_STRING "SGI XFS" +#define XFS_BUILD_OPTIONS XFS_ACL_STRING \ + XFS_SECURITY_STRING \ + XFS_REALTIME_STRING \ + XFS_BIGFS_STRING \ + XFS_DBG_STRING /* DBG must be last */ + +struct xfs_inode; +struct xfs_mount; +struct xfs_buftarg; +struct block_device; + +extern __uint64_t xfs_max_file_offset(unsigned int); + +extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); + +extern const struct export_operations xfs_export_operations; +extern const struct xattr_handler *xfs_xattr_handlers[]; +extern const struct quotactl_ops xfs_quotactl_operations; + +#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) + +#endif /* __XFS_SUPER_H__ */ diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c new file mode 100644 index 000000000000..4604f90f86a3 --- /dev/null +++ b/fs/xfs/xfs_sync.c @@ -0,0 +1,1065 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_dinode.h" +#include "xfs_error.h" +#include "xfs_filestream.h" +#include "xfs_vnodeops.h" +#include "xfs_inode_item.h" +#include "xfs_quota.h" +#include "xfs_trace.h" +#include "xfs_fsops.h" + +#include +#include + +struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ + +/* + * The inode lookup is done in batches to keep the amount of lock traffic and + * radix tree lookups to a minimum. The batch size is a trade off between + * lookup reduction and stack usage. This is in the reclaim path, so we can't + * be too greedy. + */ +#define XFS_LOOKUP_BATCH 32 + +STATIC int +xfs_inode_ag_walk_grab( + struct xfs_inode *ip) +{ + struct inode *inode = VFS_I(ip); + + ASSERT(rcu_read_lock_held()); + + /* + * check for stale RCU freed inode + * + * If the inode has been reallocated, it doesn't matter if it's not in + * the AG we are walking - we are walking for writeback, so if it + * passes all the "valid inode" checks and is dirty, then we'll write + * it back anyway. If it has been reallocated and still being + * initialised, the XFS_INEW check below will catch it. + */ + spin_lock(&ip->i_flags_lock); + if (!ip->i_ino) + goto out_unlock_noent; + + /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ + if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) + goto out_unlock_noent; + spin_unlock(&ip->i_flags_lock); + + /* nothing to sync during shutdown */ + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return EFSCORRUPTED; + + /* If we can't grab the inode, it must on it's way to reclaim. */ + if (!igrab(inode)) + return ENOENT; + + if (is_bad_inode(inode)) { + IRELE(ip); + return ENOENT; + } + + /* inode is valid */ + return 0; + +out_unlock_noent: + spin_unlock(&ip->i_flags_lock); + return ENOENT; +} + +STATIC int +xfs_inode_ag_walk( + struct xfs_mount *mp, + struct xfs_perag *pag, + int (*execute)(struct xfs_inode *ip, + struct xfs_perag *pag, int flags), + int flags) +{ + uint32_t first_index; + int last_error = 0; + int skipped; + int done; + int nr_found; + +restart: + done = 0; + skipped = 0; + first_index = 0; + nr_found = 0; + do { + struct xfs_inode *batch[XFS_LOOKUP_BATCH]; + int error = 0; + int i; + + rcu_read_lock(); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void **)batch, first_index, + XFS_LOOKUP_BATCH); + if (!nr_found) { + rcu_read_unlock(); + break; + } + + /* + * Grab the inodes before we drop the lock. if we found + * nothing, nr == 0 and the loop will be skipped. + */ + for (i = 0; i < nr_found; i++) { + struct xfs_inode *ip = batch[i]; + + if (done || xfs_inode_ag_walk_grab(ip)) + batch[i] = NULL; + + /* + * Update the index for the next lookup. Catch + * overflows into the next AG range which can occur if + * we have inodes in the last block of the AG and we + * are currently pointing to the last inode. + * + * Because we may see inodes that are from the wrong AG + * due to RCU freeing and reallocation, only update the + * index if it lies in this AG. It was a race that lead + * us to see this inode, so another lookup from the + * same index will not find it again. + */ + if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno) + continue; + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) + done = 1; + } + + /* unlock now we've grabbed the inodes. */ + rcu_read_unlock(); + + for (i = 0; i < nr_found; i++) { + if (!batch[i]) + continue; + error = execute(batch[i], pag, flags); + IRELE(batch[i]); + if (error == EAGAIN) { + skipped++; + continue; + } + if (error && last_error != EFSCORRUPTED) + last_error = error; + } + + /* bail out if the filesystem is corrupted. */ + if (error == EFSCORRUPTED) + break; + + cond_resched(); + + } while (nr_found && !done); + + if (skipped) { + delay(1); + goto restart; + } + return last_error; +} + +int +xfs_inode_ag_iterator( + struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, + struct xfs_perag *pag, int flags), + int flags) +{ + struct xfs_perag *pag; + int error = 0; + int last_error = 0; + xfs_agnumber_t ag; + + ag = 0; + while ((pag = xfs_perag_get(mp, ag))) { + ag = pag->pag_agno + 1; + error = xfs_inode_ag_walk(mp, pag, execute, flags); + xfs_perag_put(pag); + if (error) { + last_error = error; + if (error == EFSCORRUPTED) + break; + } + } + return XFS_ERROR(last_error); +} + +STATIC int +xfs_sync_inode_data( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + struct inode *inode = VFS_I(ip); + struct address_space *mapping = inode->i_mapping; + int error = 0; + + if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) + goto out_wait; + + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { + if (flags & SYNC_TRYLOCK) + goto out_wait; + xfs_ilock(ip, XFS_IOLOCK_SHARED); + } + + error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ? + 0 : XBF_ASYNC, FI_NONE); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + + out_wait: + if (flags & SYNC_WAIT) + xfs_ioend_wait(ip); + return error; +} + +STATIC int +xfs_sync_inode_attr( + struct xfs_inode *ip, + struct xfs_perag *pag, + int flags) +{ + int error = 0; + + xfs_ilock(ip, XFS_ILOCK_SHARED); + if (xfs_inode_clean(ip)) + goto out_unlock; + if (!xfs_iflock_nowait(ip)) { + if (!(flags & SYNC_WAIT)) + goto out_unlock; + xfs_iflock(ip); + } + + if (xfs_inode_clean(ip)) { + xfs_ifunlock(ip); + goto out_unlock; + } + + error = xfs_iflush(ip, flags); + + /* + * We don't want to try again on non-blocking flushes that can't run + * again immediately. If an inode really must be written, then that's + * what the SYNC_WAIT flag is for. + */ + if (error == EAGAIN) { + ASSERT(!(flags & SYNC_WAIT)); + error = 0; + } + + out_unlock: + xfs_iunlock(ip, XFS_ILOCK_SHARED); + return error; +} + +/* + * Write out pagecache data for the whole filesystem. + */ +STATIC int +xfs_sync_data( + struct xfs_mount *mp, + int flags) +{ + int error; + + ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); + + error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); + if (error) + return XFS_ERROR(error); + + xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0); + return 0; +} + +/* + * Write out inode metadata (attributes) for the whole filesystem. + */ +STATIC int +xfs_sync_attr( + struct xfs_mount *mp, + int flags) +{ + ASSERT((flags & ~SYNC_WAIT) == 0); + + return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); +} + +STATIC int +xfs_sync_fsdata( + struct xfs_mount *mp) +{ + struct xfs_buf *bp; + + /* + * If the buffer is pinned then push on the log so we won't get stuck + * waiting in the write for someone, maybe ourselves, to flush the log. + * + * Even though we just pushed the log above, we did not have the + * superblock buffer locked at that point so it can become pinned in + * between there and here. + */ + bp = xfs_getsb(mp, 0); + if (xfs_buf_ispinned(bp)) + xfs_log_force(mp, 0); + + return xfs_bwrite(mp, bp); +} + +/* + * When remounting a filesystem read-only or freezing the filesystem, we have + * two phases to execute. This first phase is syncing the data before we + * quiesce the filesystem, and the second is flushing all the inodes out after + * we've waited for all the transactions created by the first phase to + * complete. The second phase ensures that the inodes are written to their + * location on disk rather than just existing in transactions in the log. This + * means after a quiesce there is no log replay required to write the inodes to + * disk (this is the main difference between a sync and a quiesce). + */ +/* + * First stage of freeze - no writers will make progress now we are here, + * so we flush delwri and delalloc buffers here, then wait for all I/O to + * complete. Data is frozen at that point. Metadata is not frozen, + * transactions can still occur here so don't bother flushing the buftarg + * because it'll just get dirty again. + */ +int +xfs_quiesce_data( + struct xfs_mount *mp) +{ + int error, error2 = 0; + + xfs_qm_sync(mp, SYNC_TRYLOCK); + xfs_qm_sync(mp, SYNC_WAIT); + + /* force out the newly dirtied log buffers */ + xfs_log_force(mp, XFS_LOG_SYNC); + + /* write superblock and hoover up shutdown errors */ + error = xfs_sync_fsdata(mp); + + /* make sure all delwri buffers are written out */ + xfs_flush_buftarg(mp->m_ddev_targp, 1); + + /* mark the log as covered if needed */ + if (xfs_log_need_covered(mp)) + error2 = xfs_fs_log_dummy(mp); + + /* flush data-only devices */ + if (mp->m_rtdev_targp) + XFS_bflush(mp->m_rtdev_targp); + + return error ? error : error2; +} + +STATIC void +xfs_quiesce_fs( + struct xfs_mount *mp) +{ + int count = 0, pincount; + + xfs_reclaim_inodes(mp, 0); + xfs_flush_buftarg(mp->m_ddev_targp, 0); + + /* + * This loop must run at least twice. The first instance of the loop + * will flush most meta data but that will generate more meta data + * (typically directory updates). Which then must be flushed and + * logged before we can write the unmount record. We also so sync + * reclaim of inodes to catch any that the above delwri flush skipped. + */ + do { + xfs_reclaim_inodes(mp, SYNC_WAIT); + xfs_sync_attr(mp, SYNC_WAIT); + pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); + if (!pincount) { + delay(50); + count++; + } + } while (count < 2); +} + +/* + * Second stage of a quiesce. The data is already synced, now we have to take + * care of the metadata. New transactions are already blocked, so we need to + * wait for any remaining transactions to drain out before proceeding. + */ +void +xfs_quiesce_attr( + struct xfs_mount *mp) +{ + int error = 0; + + /* wait for all modifications to complete */ + while (atomic_read(&mp->m_active_trans) > 0) + delay(100); + + /* flush inodes and push all remaining buffers out to disk */ + xfs_quiesce_fs(mp); + + /* + * Just warn here till VFS can correctly support + * read-only remount without racing. + */ + WARN_ON(atomic_read(&mp->m_active_trans) != 0); + + /* Push the superblock and write an unmount record */ + error = xfs_log_sbcount(mp); + if (error) + xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " + "Frozen image may not be consistent."); + xfs_log_unmount_write(mp); + xfs_unmountfs_writesb(mp); +} + +static void +xfs_syncd_queue_sync( + struct xfs_mount *mp) +{ + queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work, + msecs_to_jiffies(xfs_syncd_centisecs * 10)); +} + +/* + * Every sync period we need to unpin all items, reclaim inodes and sync + * disk quotas. We might need to cover the log to indicate that the + * filesystem is idle and not frozen. + */ +STATIC void +xfs_sync_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_sync_work); + int error; + + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + /* dgc: errors ignored here */ + if (mp->m_super->s_frozen == SB_UNFROZEN && + xfs_log_need_covered(mp)) + error = xfs_fs_log_dummy(mp); + else + xfs_log_force(mp, 0); + error = xfs_qm_sync(mp, SYNC_TRYLOCK); + + /* start pushing all the metadata that is currently dirty */ + xfs_ail_push_all(mp->m_ail); + } + + /* queue us up again */ + xfs_syncd_queue_sync(mp); +} + +/* + * Queue a new inode reclaim pass if there are reclaimable inodes and there + * isn't a reclaim pass already in progress. By default it runs every 5s based + * on the xfs syncd work default of 30s. Perhaps this should have it's own + * tunable, but that can be done if this method proves to be ineffective or too + * aggressive. + */ +static void +xfs_syncd_queue_reclaim( + struct xfs_mount *mp) +{ + + /* + * We can have inodes enter reclaim after we've shut down the syncd + * workqueue during unmount, so don't allow reclaim work to be queued + * during unmount. + */ + if (!(mp->m_super->s_flags & MS_ACTIVE)) + return; + + rcu_read_lock(); + if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { + queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work, + msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); + } + rcu_read_unlock(); +} + +/* + * This is a fast pass over the inode cache to try to get reclaim moving on as + * many inodes as possible in a short period of time. It kicks itself every few + * seconds, as well as being kicked by the inode cache shrinker when memory + * goes low. It scans as quickly as possible avoiding locked inodes or those + * already being flushed, and once done schedules a future pass. + */ +STATIC void +xfs_reclaim_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(to_delayed_work(work), + struct xfs_mount, m_reclaim_work); + + xfs_reclaim_inodes(mp, SYNC_TRYLOCK); + xfs_syncd_queue_reclaim(mp); +} + +/* + * Flush delayed allocate data, attempting to free up reserved space + * from existing allocations. At this point a new allocation attempt + * has failed with ENOSPC and we are in the process of scratching our + * heads, looking about for more room. + * + * Queue a new data flush if there isn't one already in progress and + * wait for completion of the flush. This means that we only ever have one + * inode flush in progress no matter how many ENOSPC events are occurring and + * so will prevent the system from bogging down due to every concurrent + * ENOSPC event scanning all the active inodes in the system for writeback. + */ +void +xfs_flush_inodes( + struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + queue_work(xfs_syncd_wq, &mp->m_flush_work); + flush_work_sync(&mp->m_flush_work); +} + +STATIC void +xfs_flush_worker( + struct work_struct *work) +{ + struct xfs_mount *mp = container_of(work, + struct xfs_mount, m_flush_work); + + xfs_sync_data(mp, SYNC_TRYLOCK); + xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT); +} + +int +xfs_syncd_init( + struct xfs_mount *mp) +{ + INIT_WORK(&mp->m_flush_work, xfs_flush_worker); + INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker); + INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); + + xfs_syncd_queue_sync(mp); + xfs_syncd_queue_reclaim(mp); + + return 0; +} + +void +xfs_syncd_stop( + struct xfs_mount *mp) +{ + cancel_delayed_work_sync(&mp->m_sync_work); + cancel_delayed_work_sync(&mp->m_reclaim_work); + cancel_work_sync(&mp->m_flush_work); +} + +void +__xfs_inode_set_reclaim_tag( + struct xfs_perag *pag, + struct xfs_inode *ip) +{ + radix_tree_tag_set(&pag->pag_ici_root, + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), + XFS_ICI_RECLAIM_TAG); + + if (!pag->pag_ici_reclaimable) { + /* propagate the reclaim tag up into the perag radix tree */ + spin_lock(&ip->i_mount->m_perag_lock); + radix_tree_tag_set(&ip->i_mount->m_perag_tree, + XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), + XFS_ICI_RECLAIM_TAG); + spin_unlock(&ip->i_mount->m_perag_lock); + + /* schedule periodic background inode reclaim */ + xfs_syncd_queue_reclaim(ip->i_mount); + + trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, + -1, _RET_IP_); + } + pag->pag_ici_reclaimable++; +} + +/* + * We set the inode flag atomically with the radix tree tag. + * Once we get tag lookups on the radix tree, this inode flag + * can go away. + */ +void +xfs_inode_set_reclaim_tag( + xfs_inode_t *ip) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; + + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); + spin_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + __xfs_inode_set_reclaim_tag(pag, ip); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); + spin_unlock(&ip->i_flags_lock); + spin_unlock(&pag->pag_ici_lock); + xfs_perag_put(pag); +} + +STATIC void +__xfs_inode_clear_reclaim( + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + pag->pag_ici_reclaimable--; + if (!pag->pag_ici_reclaimable) { + /* clear the reclaim tag from the perag radix tree */ + spin_lock(&ip->i_mount->m_perag_lock); + radix_tree_tag_clear(&ip->i_mount->m_perag_tree, + XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), + XFS_ICI_RECLAIM_TAG); + spin_unlock(&ip->i_mount->m_perag_lock); + trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, + -1, _RET_IP_); + } +} + +void +__xfs_inode_clear_reclaim_tag( + xfs_mount_t *mp, + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + radix_tree_tag_clear(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + __xfs_inode_clear_reclaim(pag, ip); +} + +/* + * Grab the inode for reclaim exclusively. + * Return 0 if we grabbed it, non-zero otherwise. + */ +STATIC int +xfs_reclaim_inode_grab( + struct xfs_inode *ip, + int flags) +{ + ASSERT(rcu_read_lock_held()); + + /* quick check for stale RCU freed inode */ + if (!ip->i_ino) + return 1; + + /* + * do some unlocked checks first to avoid unnecessary lock traffic. + * The first is a flush lock check, the second is a already in reclaim + * check. Only do these checks if we are not going to block on locks. + */ + if ((flags & SYNC_TRYLOCK) && + (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { + return 1; + } + + /* + * The radix tree lock here protects a thread in xfs_iget from racing + * with us starting reclaim on the inode. Once we have the + * XFS_IRECLAIM flag set it will not touch us. + * + * Due to RCU lookup, we may find inodes that have been freed and only + * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that + * aren't candidates for reclaim at all, so we must check the + * XFS_IRECLAIMABLE is set first before proceeding to reclaim. + */ + spin_lock(&ip->i_flags_lock); + if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) || + __xfs_iflags_test(ip, XFS_IRECLAIM)) { + /* not a reclaim candidate. */ + spin_unlock(&ip->i_flags_lock); + return 1; + } + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + return 0; +} + +/* + * Inodes in different states need to be treated differently, and the return + * value of xfs_iflush is not sufficient to get this right. The following table + * lists the inode states and the reclaim actions necessary for non-blocking + * reclaim: + * + * + * inode state iflush ret required action + * --------------- ---------- --------------- + * bad - reclaim + * shutdown EIO unpin and reclaim + * clean, unpinned 0 reclaim + * stale, unpinned 0 reclaim + * clean, pinned(*) 0 requeue + * stale, pinned EAGAIN requeue + * dirty, delwri ok 0 requeue + * dirty, delwri blocked EAGAIN requeue + * dirty, sync flush 0 reclaim + * + * (*) dgc: I don't think the clean, pinned state is possible but it gets + * handled anyway given the order of checks implemented. + * + * As can be seen from the table, the return value of xfs_iflush() is not + * sufficient to correctly decide the reclaim action here. The checks in + * xfs_iflush() might look like duplicates, but they are not. + * + * Also, because we get the flush lock first, we know that any inode that has + * been flushed delwri has had the flush completed by the time we check that + * the inode is clean. The clean inode check needs to be done before flushing + * the inode delwri otherwise we would loop forever requeuing clean inodes as + * we cannot tell apart a successful delwri flush and a clean inode from the + * return value of xfs_iflush(). + * + * Note that because the inode is flushed delayed write by background + * writeback, the flush lock may already be held here and waiting on it can + * result in very long latencies. Hence for sync reclaims, where we wait on the + * flush lock, the caller should push out delayed write inodes first before + * trying to reclaim them to minimise the amount of time spent waiting. For + * background relaim, we just requeue the inode for the next pass. + * + * Hence the order of actions after gaining the locks should be: + * bad => reclaim + * shutdown => unpin and reclaim + * pinned, delwri => requeue + * pinned, sync => unpin + * stale => reclaim + * clean => reclaim + * dirty, delwri => flush and requeue + * dirty, sync => flush, wait and reclaim + */ +STATIC int +xfs_reclaim_inode( + struct xfs_inode *ip, + struct xfs_perag *pag, + int sync_mode) +{ + int error; + +restart: + error = 0; + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (!xfs_iflock_nowait(ip)) { + if (!(sync_mode & SYNC_WAIT)) + goto out; + xfs_iflock(ip); + } + + if (is_bad_inode(VFS_I(ip))) + goto reclaim; + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_iunpin_wait(ip); + goto reclaim; + } + if (xfs_ipincount(ip)) { + if (!(sync_mode & SYNC_WAIT)) { + xfs_ifunlock(ip); + goto out; + } + xfs_iunpin_wait(ip); + } + if (xfs_iflags_test(ip, XFS_ISTALE)) + goto reclaim; + if (xfs_inode_clean(ip)) + goto reclaim; + + /* + * Now we have an inode that needs flushing. + * + * We do a nonblocking flush here even if we are doing a SYNC_WAIT + * reclaim as we can deadlock with inode cluster removal. + * xfs_ifree_cluster() can lock the inode buffer before it locks the + * ip->i_lock, and we are doing the exact opposite here. As a result, + * doing a blocking xfs_itobp() to get the cluster buffer will result + * in an ABBA deadlock with xfs_ifree_cluster(). + * + * As xfs_ifree_cluser() must gather all inodes that are active in the + * cache to mark them stale, if we hit this case we don't actually want + * to do IO here - we want the inode marked stale so we can simply + * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush, + * just unlock the inode, back off and try again. Hopefully the next + * pass through will see the stale flag set on the inode. + */ + error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode); + if (sync_mode & SYNC_WAIT) { + if (error == EAGAIN) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* backoff longer than in xfs_ifree_cluster */ + delay(2); + goto restart; + } + xfs_iflock(ip); + goto reclaim; + } + + /* + * When we have to flush an inode but don't have SYNC_WAIT set, we + * flush the inode out using a delwri buffer and wait for the next + * call into reclaim to find it in a clean state instead of waiting for + * it now. We also don't return errors here - if the error is transient + * then the next reclaim pass will flush the inode, and if the error + * is permanent then the next sync reclaim will reclaim the inode and + * pass on the error. + */ + if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) { + xfs_warn(ip->i_mount, + "inode 0x%llx background reclaim flush failed with %d", + (long long)ip->i_ino, error); + } +out: + xfs_iflags_clear(ip, XFS_IRECLAIM); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* + * We could return EAGAIN here to make reclaim rescan the inode tree in + * a short while. However, this just burns CPU time scanning the tree + * waiting for IO to complete and xfssyncd never goes back to the idle + * state. Instead, return 0 to let the next scheduled background reclaim + * attempt to reclaim the inode again. + */ + return 0; + +reclaim: + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + XFS_STATS_INC(xs_ig_reclaims); + /* + * Remove the inode from the per-AG radix tree. + * + * Because radix_tree_delete won't complain even if the item was never + * added to the tree assert that it's been there before to catch + * problems with the inode life time early on. + */ + spin_lock(&pag->pag_ici_lock); + if (!radix_tree_delete(&pag->pag_ici_root, + XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) + ASSERT(0); + __xfs_inode_clear_reclaim(pag, ip); + spin_unlock(&pag->pag_ici_lock); + + /* + * Here we do an (almost) spurious inode lock in order to coordinate + * with inode cache radix tree lookups. This is because the lookup + * can reference the inodes in the cache without taking references. + * + * We make that OK here by ensuring that we wait until the inode is + * unlocked after the lookup before we go ahead and free it. We get + * both the ilock and the iolock because the code may need to drop the + * ilock one but will still hold the iolock. + */ + xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + xfs_qm_dqdetach(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); + + xfs_inode_free(ip); + return error; + +} + +/* + * Walk the AGs and reclaim the inodes in them. Even if the filesystem is + * corrupted, we still want to try to reclaim all the inodes. If we don't, + * then a shut down during filesystem unmount reclaim walk leak all the + * unreclaimed inodes. + */ +int +xfs_reclaim_inodes_ag( + struct xfs_mount *mp, + int flags, + int *nr_to_scan) +{ + struct xfs_perag *pag; + int error = 0; + int last_error = 0; + xfs_agnumber_t ag; + int trylock = flags & SYNC_TRYLOCK; + int skipped; + +restart: + ag = 0; + skipped = 0; + while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { + unsigned long first_index = 0; + int done = 0; + int nr_found = 0; + + ag = pag->pag_agno + 1; + + if (trylock) { + if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { + skipped++; + xfs_perag_put(pag); + continue; + } + first_index = pag->pag_ici_reclaim_cursor; + } else + mutex_lock(&pag->pag_ici_reclaim_lock); + + do { + struct xfs_inode *batch[XFS_LOOKUP_BATCH]; + int i; + + rcu_read_lock(); + nr_found = radix_tree_gang_lookup_tag( + &pag->pag_ici_root, + (void **)batch, first_index, + XFS_LOOKUP_BATCH, + XFS_ICI_RECLAIM_TAG); + if (!nr_found) { + done = 1; + rcu_read_unlock(); + break; + } + + /* + * Grab the inodes before we drop the lock. if we found + * nothing, nr == 0 and the loop will be skipped. + */ + for (i = 0; i < nr_found; i++) { + struct xfs_inode *ip = batch[i]; + + if (done || xfs_reclaim_inode_grab(ip, flags)) + batch[i] = NULL; + + /* + * Update the index for the next lookup. Catch + * overflows into the next AG range which can + * occur if we have inodes in the last block of + * the AG and we are currently pointing to the + * last inode. + * + * Because we may see inodes that are from the + * wrong AG due to RCU freeing and + * reallocation, only update the index if it + * lies in this AG. It was a race that lead us + * to see this inode, so another lookup from + * the same index will not find it again. + */ + if (XFS_INO_TO_AGNO(mp, ip->i_ino) != + pag->pag_agno) + continue; + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) + done = 1; + } + + /* unlock now we've grabbed the inodes. */ + rcu_read_unlock(); + + for (i = 0; i < nr_found; i++) { + if (!batch[i]) + continue; + error = xfs_reclaim_inode(batch[i], pag, flags); + if (error && last_error != EFSCORRUPTED) + last_error = error; + } + + *nr_to_scan -= XFS_LOOKUP_BATCH; + + cond_resched(); + + } while (nr_found && !done && *nr_to_scan > 0); + + if (trylock && !done) + pag->pag_ici_reclaim_cursor = first_index; + else + pag->pag_ici_reclaim_cursor = 0; + mutex_unlock(&pag->pag_ici_reclaim_lock); + xfs_perag_put(pag); + } + + /* + * if we skipped any AG, and we still have scan count remaining, do + * another pass this time using blocking reclaim semantics (i.e + * waiting on the reclaim locks and ignoring the reclaim cursors). This + * ensure that when we get more reclaimers than AGs we block rather + * than spin trying to execute reclaim. + */ + if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) { + trylock = 0; + goto restart; + } + return XFS_ERROR(last_error); +} + +int +xfs_reclaim_inodes( + xfs_mount_t *mp, + int mode) +{ + int nr_to_scan = INT_MAX; + + return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); +} + +/* + * Scan a certain number of inodes for reclaim. + * + * When called we make sure that there is a background (fast) inode reclaim in + * progress, while we will throttle the speed of reclaim via doing synchronous + * reclaim of inodes. That means if we come across dirty inodes, we wait for + * them to be cleaned, which we hope will not be very long due to the + * background walker having already kicked the IO off on those dirty inodes. + */ +void +xfs_reclaim_inodes_nr( + struct xfs_mount *mp, + int nr_to_scan) +{ + /* kick background reclaimer and push the AIL */ + xfs_syncd_queue_reclaim(mp); + xfs_ail_push_all(mp->m_ail); + + xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan); +} + +/* + * Return the number of reclaimable inodes in the filesystem for + * the shrinker to determine how much to reclaim. + */ +int +xfs_reclaim_inodes_count( + struct xfs_mount *mp) +{ + struct xfs_perag *pag; + xfs_agnumber_t ag = 0; + int reclaimable = 0; + + while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { + ag = pag->pag_agno + 1; + reclaimable += pag->pag_ici_reclaimable; + xfs_perag_put(pag); + } + return reclaimable; +} + diff --git a/fs/xfs/xfs_sync.h b/fs/xfs/xfs_sync.h new file mode 100644 index 000000000000..941202e7ac6e --- /dev/null +++ b/fs/xfs/xfs_sync.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef XFS_SYNC_H +#define XFS_SYNC_H 1 + +struct xfs_mount; +struct xfs_perag; + +#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ +#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ + +extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */ + +int xfs_syncd_init(struct xfs_mount *mp); +void xfs_syncd_stop(struct xfs_mount *mp); + +int xfs_quiesce_data(struct xfs_mount *mp); +void xfs_quiesce_attr(struct xfs_mount *mp); + +void xfs_flush_inodes(struct xfs_inode *ip); + +int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); +int xfs_reclaim_inodes_count(struct xfs_mount *mp); +void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan); + +void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); +void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); +void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, + struct xfs_inode *ip); + +int xfs_sync_inode_grab(struct xfs_inode *ip); +int xfs_inode_ag_iterator(struct xfs_mount *mp, + int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), + int flags); + +#endif diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c new file mode 100644 index 000000000000..ee2d2adaa438 --- /dev/null +++ b/fs/xfs/xfs_sysctl.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2001-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include +#include +#include "xfs_error.h" + +static struct ctl_table_header *xfs_table_header; + +#ifdef CONFIG_PROC_FS +STATIC int +xfs_stats_clear_proc_handler( + ctl_table *ctl, + int write, + void __user *buffer, + size_t *lenp, + loff_t *ppos) +{ + int c, ret, *valp = ctl->data; + __uint32_t vn_active; + + ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); + + if (!ret && write && *valp) { + xfs_notice(NULL, "Clearing xfsstats"); + for_each_possible_cpu(c) { + preempt_disable(); + /* save vn_active, it's a universal truth! */ + vn_active = per_cpu(xfsstats, c).vn_active; + memset(&per_cpu(xfsstats, c), 0, + sizeof(struct xfsstats)); + per_cpu(xfsstats, c).vn_active = vn_active; + preempt_enable(); + } + xfs_stats_clear = 0; + } + + return ret; +} + +STATIC int +xfs_panic_mask_proc_handler( + ctl_table *ctl, + int write, + void __user *buffer, + size_t *lenp, + loff_t *ppos) +{ + int ret, *valp = ctl->data; + + ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); + if (!ret && write) { + xfs_panic_mask = *valp; +#ifdef DEBUG + xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES); +#endif + } + return ret; +} +#endif /* CONFIG_PROC_FS */ + +static ctl_table xfs_table[] = { + { + .procname = "irix_sgid_inherit", + .data = &xfs_params.sgid_inherit.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.sgid_inherit.min, + .extra2 = &xfs_params.sgid_inherit.max + }, + { + .procname = "irix_symlink_mode", + .data = &xfs_params.symlink_mode.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.symlink_mode.min, + .extra2 = &xfs_params.symlink_mode.max + }, + { + .procname = "panic_mask", + .data = &xfs_params.panic_mask.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = xfs_panic_mask_proc_handler, + .extra1 = &xfs_params.panic_mask.min, + .extra2 = &xfs_params.panic_mask.max + }, + + { + .procname = "error_level", + .data = &xfs_params.error_level.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.error_level.min, + .extra2 = &xfs_params.error_level.max + }, + { + .procname = "xfssyncd_centisecs", + .data = &xfs_params.syncd_timer.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.syncd_timer.min, + .extra2 = &xfs_params.syncd_timer.max + }, + { + .procname = "inherit_sync", + .data = &xfs_params.inherit_sync.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.inherit_sync.min, + .extra2 = &xfs_params.inherit_sync.max + }, + { + .procname = "inherit_nodump", + .data = &xfs_params.inherit_nodump.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.inherit_nodump.min, + .extra2 = &xfs_params.inherit_nodump.max + }, + { + .procname = "inherit_noatime", + .data = &xfs_params.inherit_noatim.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.inherit_noatim.min, + .extra2 = &xfs_params.inherit_noatim.max + }, + { + .procname = "xfsbufd_centisecs", + .data = &xfs_params.xfs_buf_timer.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.xfs_buf_timer.min, + .extra2 = &xfs_params.xfs_buf_timer.max + }, + { + .procname = "age_buffer_centisecs", + .data = &xfs_params.xfs_buf_age.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.xfs_buf_age.min, + .extra2 = &xfs_params.xfs_buf_age.max + }, + { + .procname = "inherit_nosymlinks", + .data = &xfs_params.inherit_nosym.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.inherit_nosym.min, + .extra2 = &xfs_params.inherit_nosym.max + }, + { + .procname = "rotorstep", + .data = &xfs_params.rotorstep.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.rotorstep.min, + .extra2 = &xfs_params.rotorstep.max + }, + { + .procname = "inherit_nodefrag", + .data = &xfs_params.inherit_nodfrg.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.inherit_nodfrg.min, + .extra2 = &xfs_params.inherit_nodfrg.max + }, + { + .procname = "filestream_centisecs", + .data = &xfs_params.fstrm_timer.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &xfs_params.fstrm_timer.min, + .extra2 = &xfs_params.fstrm_timer.max, + }, + /* please keep this the last entry */ +#ifdef CONFIG_PROC_FS + { + .procname = "stats_clear", + .data = &xfs_params.stats_clear.val, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = xfs_stats_clear_proc_handler, + .extra1 = &xfs_params.stats_clear.min, + .extra2 = &xfs_params.stats_clear.max + }, +#endif /* CONFIG_PROC_FS */ + + {} +}; + +static ctl_table xfs_dir_table[] = { + { + .procname = "xfs", + .mode = 0555, + .child = xfs_table + }, + {} +}; + +static ctl_table xfs_root_table[] = { + { + .procname = "fs", + .mode = 0555, + .child = xfs_dir_table + }, + {} +}; + +int +xfs_sysctl_register(void) +{ + xfs_table_header = register_sysctl_table(xfs_root_table); + if (!xfs_table_header) + return -ENOMEM; + return 0; +} + +void +xfs_sysctl_unregister(void) +{ + unregister_sysctl_table(xfs_table_header); +} diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h new file mode 100644 index 000000000000..b9937d450f8e --- /dev/null +++ b/fs/xfs/xfs_sysctl.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2001-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_SYSCTL_H__ +#define __XFS_SYSCTL_H__ + +#include + +/* + * Tunable xfs parameters + */ + +typedef struct xfs_sysctl_val { + int min; + int val; + int max; +} xfs_sysctl_val_t; + +typedef struct xfs_param { + xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is + * not a member of parent dir GID. */ + xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ + xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */ + xfs_sysctl_val_t error_level; /* Degree of reporting for problems */ + xfs_sysctl_val_t syncd_timer; /* Interval between xfssyncd wakeups */ + xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */ + xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */ + xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */ + xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */ + xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */ + xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */ + xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */ + xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */ + xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */ + xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */ +} xfs_param_t; + +/* + * xfs_error_level: + * + * How much error reporting will be done when internal problems are + * encountered. These problems normally return an EFSCORRUPTED to their + * caller, with no other information reported. + * + * 0 No error reports + * 1 Report EFSCORRUPTED errors that will cause a filesystem shutdown + * 5 Report all EFSCORRUPTED errors (all of the above errors, plus any + * additional errors that are known to not cause shutdowns) + * + * xfs_panic_mask bit 0x8 turns the error reports into panics + */ + +enum { + /* XFS_REFCACHE_SIZE = 1 */ + /* XFS_REFCACHE_PURGE = 2 */ + /* XFS_RESTRICT_CHOWN = 3 */ + XFS_SGID_INHERIT = 4, + XFS_SYMLINK_MODE = 5, + XFS_PANIC_MASK = 6, + XFS_ERRLEVEL = 7, + XFS_SYNCD_TIMER = 8, + /* XFS_PROBE_DMAPI = 9 */ + /* XFS_PROBE_IOOPS = 10 */ + /* XFS_PROBE_QUOTA = 11 */ + XFS_STATS_CLEAR = 12, + XFS_INHERIT_SYNC = 13, + XFS_INHERIT_NODUMP = 14, + XFS_INHERIT_NOATIME = 15, + XFS_BUF_TIMER = 16, + XFS_BUF_AGE = 17, + /* XFS_IO_BYPASS = 18 */ + XFS_INHERIT_NOSYM = 19, + XFS_ROTORSTEP = 20, + XFS_INHERIT_NODFRG = 21, + XFS_FILESTREAM_TIMER = 22, +}; + +extern xfs_param_t xfs_params; + +#ifdef CONFIG_SYSCTL +extern int xfs_sysctl_register(void); +extern void xfs_sysctl_unregister(void); +#else +# define xfs_sysctl_register() (0) +# define xfs_sysctl_unregister() do { } while (0) +#endif /* CONFIG_SYSCTL */ + +#endif /* __XFS_SYSCTL_H__ */ diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c new file mode 100644 index 000000000000..9010ce885e6a --- /dev/null +++ b/fs/xfs/xfs_trace.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2009, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_dinode.h" +#include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_mount.h" +#include "xfs_ialloc.h" +#include "xfs_itable.h" +#include "xfs_alloc.h" +#include "xfs_bmap.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_log_priv.h" +#include "xfs_buf_item.h" +#include "xfs_quota.h" +#include "xfs_iomap.h" +#include "xfs_aops.h" +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" +#include "xfs_log_recover.h" +#include "xfs_inode_item.h" + +/* + * We include this last to have the helpers above available for the trace + * event implementations. + */ +#define CREATE_TRACE_POINTS +#include "xfs_trace.h" diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h new file mode 100644 index 000000000000..690fc7a7bd72 --- /dev/null +++ b/fs/xfs/xfs_trace.h @@ -0,0 +1,1746 @@ +/* + * Copyright (c) 2009, Christoph Hellwig + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM xfs + +#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_XFS_H + +#include + +struct xfs_agf; +struct xfs_alloc_arg; +struct xfs_attr_list_context; +struct xfs_buf_log_item; +struct xfs_da_args; +struct xfs_da_node_entry; +struct xfs_dquot; +struct xlog_ticket; +struct log; +struct xlog_recover; +struct xlog_recover_item; +struct xfs_buf_log_format; +struct xfs_inode_log_format; + +DECLARE_EVENT_CLASS(xfs_attr_list_class, + TP_PROTO(struct xfs_attr_list_context *ctx), + TP_ARGS(ctx), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u32, hashval) + __field(u32, blkno) + __field(u32, offset) + __field(void *, alist) + __field(int, bufsize) + __field(int, count) + __field(int, firstu) + __field(int, dupcnt) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; + __entry->ino = ctx->dp->i_ino; + __entry->hashval = ctx->cursor->hashval; + __entry->blkno = ctx->cursor->blkno; + __entry->offset = ctx->cursor->offset; + __entry->alist = ctx->alist; + __entry->bufsize = ctx->bufsize; + __entry->count = ctx->count; + __entry->firstu = ctx->firstu; + __entry->flags = ctx->flags; + ), + TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " + "alist 0x%p size %u count %u firstu %u flags %d %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->hashval, + __entry->blkno, + __entry->offset, + __entry->dupcnt, + __entry->alist, + __entry->bufsize, + __entry->count, + __entry->firstu, + __entry->flags, + __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) + ) +) + +#define DEFINE_ATTR_LIST_EVENT(name) \ +DEFINE_EVENT(xfs_attr_list_class, name, \ + TP_PROTO(struct xfs_attr_list_context *ctx), \ + TP_ARGS(ctx)) +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk); +DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); + +DECLARE_EVENT_CLASS(xfs_perag_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, + unsigned long caller_ip), + TP_ARGS(mp, agno, refcount, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, refcount) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->refcount = refcount; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno %u refcount %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->refcount, + (char *)__entry->caller_ip) +); + +#define DEFINE_PERAG_REF_EVENT(name) \ +DEFINE_EVENT(xfs_perag_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agno, refcount, caller_ip)) +DEFINE_PERAG_REF_EVENT(xfs_perag_get); +DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); +DEFINE_PERAG_REF_EVENT(xfs_perag_put); +DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); +DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); + +TRACE_EVENT(xfs_attr_list_node_descend, + TP_PROTO(struct xfs_attr_list_context *ctx, + struct xfs_da_node_entry *btree), + TP_ARGS(ctx, btree), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(u32, hashval) + __field(u32, blkno) + __field(u32, offset) + __field(void *, alist) + __field(int, bufsize) + __field(int, count) + __field(int, firstu) + __field(int, dupcnt) + __field(int, flags) + __field(u32, bt_hashval) + __field(u32, bt_before) + ), + TP_fast_assign( + __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; + __entry->ino = ctx->dp->i_ino; + __entry->hashval = ctx->cursor->hashval; + __entry->blkno = ctx->cursor->blkno; + __entry->offset = ctx->cursor->offset; + __entry->alist = ctx->alist; + __entry->bufsize = ctx->bufsize; + __entry->count = ctx->count; + __entry->firstu = ctx->firstu; + __entry->flags = ctx->flags; + __entry->bt_hashval = be32_to_cpu(btree->hashval); + __entry->bt_before = be32_to_cpu(btree->before); + ), + TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " + "alist 0x%p size %u count %u firstu %u flags %d %s " + "node hashval %u, node before %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->hashval, + __entry->blkno, + __entry->offset, + __entry->dupcnt, + __entry->alist, + __entry->bufsize, + __entry->count, + __entry->firstu, + __entry->flags, + __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS), + __entry->bt_hashval, + __entry->bt_before) +); + +TRACE_EVENT(xfs_iext_insert, + TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, + struct xfs_bmbt_irec *r, int state, unsigned long caller_ip), + TP_ARGS(ip, idx, r, state, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_extnum_t, idx) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + __field(xfs_exntst_t, state) + __field(int, bmap_state) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->idx = idx; + __entry->startoff = r->br_startoff; + __entry->startblock = r->br_startblock; + __entry->blockcount = r->br_blockcount; + __entry->state = r->br_state; + __entry->bmap_state = state; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " + "offset %lld block %lld count %lld flag %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), + (long)__entry->idx, + __entry->startoff, + (__int64_t)__entry->startblock, + __entry->blockcount, + __entry->state, + (char *)__entry->caller_ip) +); + +DECLARE_EVENT_CLASS(xfs_bmap_class, + TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, + unsigned long caller_ip), + TP_ARGS(ip, idx, state, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_extnum_t, idx) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + __field(xfs_exntst_t, state) + __field(int, bmap_state) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? + ip->i_afp : &ip->i_df; + struct xfs_bmbt_irec r; + + xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->idx = idx; + __entry->startoff = r.br_startoff; + __entry->startblock = r.br_startblock; + __entry->blockcount = r.br_blockcount; + __entry->state = r.br_state; + __entry->bmap_state = state; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " + "offset %lld block %lld count %lld flag %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), + (long)__entry->idx, + __entry->startoff, + (__int64_t)__entry->startblock, + __entry->blockcount, + __entry->state, + (char *)__entry->caller_ip) +) + +#define DEFINE_BMAP_EVENT(name) \ +DEFINE_EVENT(xfs_bmap_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \ + unsigned long caller_ip), \ + TP_ARGS(ip, idx, state, caller_ip)) +DEFINE_BMAP_EVENT(xfs_iext_remove); +DEFINE_BMAP_EVENT(xfs_bmap_pre_update); +DEFINE_BMAP_EVENT(xfs_bmap_post_update); +DEFINE_BMAP_EVENT(xfs_extlist); + +DECLARE_EVENT_CLASS(xfs_buf_class, + TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), + TP_ARGS(bp, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(unsigned, flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = bp->b_sema.count; + __entry->flags = bp->b_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) +) + +#define DEFINE_BUF_EVENT(name) \ +DEFINE_EVENT(xfs_buf_class, name, \ + TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \ + TP_ARGS(bp, caller_ip)) +DEFINE_BUF_EVENT(xfs_buf_init); +DEFINE_BUF_EVENT(xfs_buf_free); +DEFINE_BUF_EVENT(xfs_buf_hold); +DEFINE_BUF_EVENT(xfs_buf_rele); +DEFINE_BUF_EVENT(xfs_buf_iodone); +DEFINE_BUF_EVENT(xfs_buf_iorequest); +DEFINE_BUF_EVENT(xfs_buf_bawrite); +DEFINE_BUF_EVENT(xfs_buf_bdwrite); +DEFINE_BUF_EVENT(xfs_buf_lock); +DEFINE_BUF_EVENT(xfs_buf_lock_done); +DEFINE_BUF_EVENT(xfs_buf_trylock); +DEFINE_BUF_EVENT(xfs_buf_unlock); +DEFINE_BUF_EVENT(xfs_buf_iowait); +DEFINE_BUF_EVENT(xfs_buf_iowait_done); +DEFINE_BUF_EVENT(xfs_buf_delwri_queue); +DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); +DEFINE_BUF_EVENT(xfs_buf_delwri_split); +DEFINE_BUF_EVENT(xfs_buf_get_uncached); +DEFINE_BUF_EVENT(xfs_bdstrat_shut); +DEFINE_BUF_EVENT(xfs_buf_item_relse); +DEFINE_BUF_EVENT(xfs_buf_item_iodone); +DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); +DEFINE_BUF_EVENT(xfs_buf_error_relse); +DEFINE_BUF_EVENT(xfs_trans_read_buf_io); +DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); + +/* not really buffer traces, but the buf provides useful information */ +DEFINE_BUF_EVENT(xfs_btree_corrupt); +DEFINE_BUF_EVENT(xfs_da_btree_corrupt); +DEFINE_BUF_EVENT(xfs_reset_dqcounts); +DEFINE_BUF_EVENT(xfs_inode_item_push); + +/* pass flags explicitly */ +DECLARE_EVENT_CLASS(xfs_buf_flags_class, + TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), + TP_ARGS(bp, flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(unsigned, flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->flags = flags; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = bp->b_sema.count; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) +) + +#define DEFINE_BUF_FLAGS_EVENT(name) \ +DEFINE_EVENT(xfs_buf_flags_class, name, \ + TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \ + TP_ARGS(bp, flags, caller_ip)) +DEFINE_BUF_FLAGS_EVENT(xfs_buf_find); +DEFINE_BUF_FLAGS_EVENT(xfs_buf_get); +DEFINE_BUF_FLAGS_EVENT(xfs_buf_read); + +TRACE_EVENT(xfs_buf_ioerror, + TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip), + TP_ARGS(bp, error, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, bno) + __field(size_t, buffer_length) + __field(unsigned, flags) + __field(int, hold) + __field(int, pincount) + __field(unsigned, lockval) + __field(int, error) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = bp->b_target->bt_dev; + __entry->bno = bp->b_bn; + __entry->buffer_length = bp->b_buffer_length; + __entry->hold = atomic_read(&bp->b_hold); + __entry->pincount = atomic_read(&bp->b_pin_count); + __entry->lockval = bp->b_sema.count; + __entry->error = error; + __entry->flags = bp->b_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d error %d flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->bno, + __entry->buffer_length, + __entry->hold, + __entry->pincount, + __entry->lockval, + __entry->error, + __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), + (void *)__entry->caller_ip) +); + +DECLARE_EVENT_CLASS(xfs_buf_item_class, + TP_PROTO(struct xfs_buf_log_item *bip), + TP_ARGS(bip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_daddr_t, buf_bno) + __field(size_t, buf_len) + __field(int, buf_hold) + __field(int, buf_pincount) + __field(int, buf_lockval) + __field(unsigned, buf_flags) + __field(unsigned, bli_recur) + __field(int, bli_refcount) + __field(unsigned, bli_flags) + __field(void *, li_desc) + __field(unsigned, li_flags) + ), + TP_fast_assign( + __entry->dev = bip->bli_buf->b_target->bt_dev; + __entry->bli_flags = bip->bli_flags; + __entry->bli_recur = bip->bli_recur; + __entry->bli_refcount = atomic_read(&bip->bli_refcount); + __entry->buf_bno = bip->bli_buf->b_bn; + __entry->buf_len = bip->bli_buf->b_buffer_length; + __entry->buf_flags = bip->bli_buf->b_flags; + __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); + __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); + __entry->buf_lockval = bip->bli_buf->b_sema.count; + __entry->li_desc = bip->bli_item.li_desc; + __entry->li_flags = bip->bli_item.li_flags; + ), + TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " + "lock %d flags %s recur %d refcount %d bliflags %s " + "lidesc 0x%p liflags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long long)__entry->buf_bno, + __entry->buf_len, + __entry->buf_hold, + __entry->buf_pincount, + __entry->buf_lockval, + __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), + __entry->bli_recur, + __entry->bli_refcount, + __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), + __entry->li_desc, + __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) +) + +#define DEFINE_BUF_ITEM_EVENT(name) \ +DEFINE_EVENT(xfs_buf_item_class, name, \ + TP_PROTO(struct xfs_buf_log_item *bip), \ + TP_ARGS(bip)) +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); +DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur); +DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb); +DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur); +DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur); +DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf); +DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse); +DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin); +DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold); +DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); +DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); + +DECLARE_EVENT_CLASS(xfs_lock_class, + TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, + unsigned long caller_ip), + TP_ARGS(ip, lock_flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, lock_flags) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->lock_flags = lock_flags; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), + (void *)__entry->caller_ip) +) + +#define DEFINE_LOCK_EVENT(name) \ +DEFINE_EVENT(xfs_lock_class, name, \ + TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \ + unsigned long caller_ip), \ + TP_ARGS(ip, lock_flags, caller_ip)) +DEFINE_LOCK_EVENT(xfs_ilock); +DEFINE_LOCK_EVENT(xfs_ilock_nowait); +DEFINE_LOCK_EVENT(xfs_ilock_demote); +DEFINE_LOCK_EVENT(xfs_iunlock); + +DECLARE_EVENT_CLASS(xfs_inode_class, + TP_PROTO(struct xfs_inode *ip), + TP_ARGS(ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + ), + TP_printk("dev %d:%d ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino) +) + +#define DEFINE_INODE_EVENT(name) \ +DEFINE_EVENT(xfs_inode_class, name, \ + TP_PROTO(struct xfs_inode *ip), \ + TP_ARGS(ip)) +DEFINE_INODE_EVENT(xfs_iget_skip); +DEFINE_INODE_EVENT(xfs_iget_reclaim); +DEFINE_INODE_EVENT(xfs_iget_reclaim_fail); +DEFINE_INODE_EVENT(xfs_iget_hit); +DEFINE_INODE_EVENT(xfs_iget_miss); + +DEFINE_INODE_EVENT(xfs_getattr); +DEFINE_INODE_EVENT(xfs_setattr); +DEFINE_INODE_EVENT(xfs_readlink); +DEFINE_INODE_EVENT(xfs_alloc_file_space); +DEFINE_INODE_EVENT(xfs_free_file_space); +DEFINE_INODE_EVENT(xfs_readdir); +#ifdef CONFIG_XFS_POSIX_ACL +DEFINE_INODE_EVENT(xfs_get_acl); +#endif +DEFINE_INODE_EVENT(xfs_vm_bmap); +DEFINE_INODE_EVENT(xfs_file_ioctl); +DEFINE_INODE_EVENT(xfs_file_compat_ioctl); +DEFINE_INODE_EVENT(xfs_ioctl_setattr); +DEFINE_INODE_EVENT(xfs_file_fsync); +DEFINE_INODE_EVENT(xfs_destroy_inode); +DEFINE_INODE_EVENT(xfs_write_inode); +DEFINE_INODE_EVENT(xfs_evict_inode); + +DEFINE_INODE_EVENT(xfs_dquot_dqalloc); +DEFINE_INODE_EVENT(xfs_dquot_dqdetach); + +DECLARE_EVENT_CLASS(xfs_iref_class, + TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), + TP_ARGS(ip, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, count) + __field(int, pincount) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->count = atomic_read(&VFS_I(ip)->i_count); + __entry->pincount = atomic_read(&ip->i_pincount); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->count, + __entry->pincount, + (char *)__entry->caller_ip) +) + +#define DEFINE_IREF_EVENT(name) \ +DEFINE_EVENT(xfs_iref_class, name, \ + TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \ + TP_ARGS(ip, caller_ip)) +DEFINE_IREF_EVENT(xfs_ihold); +DEFINE_IREF_EVENT(xfs_irele); +DEFINE_IREF_EVENT(xfs_inode_pin); +DEFINE_IREF_EVENT(xfs_inode_unpin); +DEFINE_IREF_EVENT(xfs_inode_unpin_nowait); + +DECLARE_EVENT_CLASS(xfs_namespace_class, + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), + TP_ARGS(dp, name), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dp_ino) + __dynamic_array(char, name, name->len) + ), + TP_fast_assign( + __entry->dev = VFS_I(dp)->i_sb->s_dev; + __entry->dp_ino = dp->i_ino; + memcpy(__get_str(name), name->name, name->len); + ), + TP_printk("dev %d:%d dp ino 0x%llx name %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dp_ino, + __get_str(name)) +) + +#define DEFINE_NAMESPACE_EVENT(name) \ +DEFINE_EVENT(xfs_namespace_class, name, \ + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \ + TP_ARGS(dp, name)) +DEFINE_NAMESPACE_EVENT(xfs_remove); +DEFINE_NAMESPACE_EVENT(xfs_link); +DEFINE_NAMESPACE_EVENT(xfs_lookup); +DEFINE_NAMESPACE_EVENT(xfs_create); +DEFINE_NAMESPACE_EVENT(xfs_symlink); + +TRACE_EVENT(xfs_rename, + TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp, + struct xfs_name *src_name, struct xfs_name *target_name), + TP_ARGS(src_dp, target_dp, src_name, target_name), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, src_dp_ino) + __field(xfs_ino_t, target_dp_ino) + __dynamic_array(char, src_name, src_name->len) + __dynamic_array(char, target_name, target_name->len) + ), + TP_fast_assign( + __entry->dev = VFS_I(src_dp)->i_sb->s_dev; + __entry->src_dp_ino = src_dp->i_ino; + __entry->target_dp_ino = target_dp->i_ino; + memcpy(__get_str(src_name), src_name->name, src_name->len); + memcpy(__get_str(target_name), target_name->name, target_name->len); + ), + TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx" + " src name %s target name %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->src_dp_ino, + __entry->target_dp_ino, + __get_str(src_name), + __get_str(target_name)) +) + +DECLARE_EVENT_CLASS(xfs_dquot_class, + TP_PROTO(struct xfs_dquot *dqp), + TP_ARGS(dqp), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(u32, id) + __field(unsigned, flags) + __field(unsigned, nrefs) + __field(unsigned long long, res_bcount) + __field(unsigned long long, bcount) + __field(unsigned long long, icount) + __field(unsigned long long, blk_hardlimit) + __field(unsigned long long, blk_softlimit) + __field(unsigned long long, ino_hardlimit) + __field(unsigned long long, ino_softlimit) + ), \ + TP_fast_assign( + __entry->dev = dqp->q_mount->m_super->s_dev; + __entry->id = be32_to_cpu(dqp->q_core.d_id); + __entry->flags = dqp->dq_flags; + __entry->nrefs = dqp->q_nrefs; + __entry->res_bcount = dqp->q_res_bcount; + __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); + __entry->icount = be64_to_cpu(dqp->q_core.d_icount); + __entry->blk_hardlimit = + be64_to_cpu(dqp->q_core.d_blk_hardlimit); + __entry->blk_softlimit = + be64_to_cpu(dqp->q_core.d_blk_softlimit); + __entry->ino_hardlimit = + be64_to_cpu(dqp->q_core.d_ino_hardlimit); + __entry->ino_softlimit = + be64_to_cpu(dqp->q_core.d_ino_softlimit); + ), + TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " + "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx " + "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->id, + __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), + __entry->nrefs, + __entry->res_bcount, + __entry->bcount, + __entry->blk_hardlimit, + __entry->blk_softlimit, + __entry->icount, + __entry->ino_hardlimit, + __entry->ino_softlimit) +) + +#define DEFINE_DQUOT_EVENT(name) \ +DEFINE_EVENT(xfs_dquot_class, name, \ + TP_PROTO(struct xfs_dquot *dqp), \ + TP_ARGS(dqp)) +DEFINE_DQUOT_EVENT(xfs_dqadjust); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_want); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty); +DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink); +DEFINE_DQUOT_EVENT(xfs_dqattach_found); +DEFINE_DQUOT_EVENT(xfs_dqattach_get); +DEFINE_DQUOT_EVENT(xfs_dqinit); +DEFINE_DQUOT_EVENT(xfs_dqreuse); +DEFINE_DQUOT_EVENT(xfs_dqalloc); +DEFINE_DQUOT_EVENT(xfs_dqtobp_read); +DEFINE_DQUOT_EVENT(xfs_dqread); +DEFINE_DQUOT_EVENT(xfs_dqread_fail); +DEFINE_DQUOT_EVENT(xfs_dqlookup_found); +DEFINE_DQUOT_EVENT(xfs_dqlookup_want); +DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist); +DEFINE_DQUOT_EVENT(xfs_dqlookup_done); +DEFINE_DQUOT_EVENT(xfs_dqget_hit); +DEFINE_DQUOT_EVENT(xfs_dqget_miss); +DEFINE_DQUOT_EVENT(xfs_dqput); +DEFINE_DQUOT_EVENT(xfs_dqput_wait); +DEFINE_DQUOT_EVENT(xfs_dqput_free); +DEFINE_DQUOT_EVENT(xfs_dqrele); +DEFINE_DQUOT_EVENT(xfs_dqflush); +DEFINE_DQUOT_EVENT(xfs_dqflush_force); +DEFINE_DQUOT_EVENT(xfs_dqflush_done); + +DECLARE_EVENT_CLASS(xfs_loggrant_class, + TP_PROTO(struct log *log, struct xlog_ticket *tic), + TP_ARGS(log, tic), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned, trans_type) + __field(char, ocnt) + __field(char, cnt) + __field(int, curr_res) + __field(int, unit_res) + __field(unsigned int, flags) + __field(int, reserveq) + __field(int, writeq) + __field(int, grant_reserve_cycle) + __field(int, grant_reserve_bytes) + __field(int, grant_write_cycle) + __field(int, grant_write_bytes) + __field(int, curr_cycle) + __field(int, curr_block) + __field(xfs_lsn_t, tail_lsn) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->trans_type = tic->t_trans_type; + __entry->ocnt = tic->t_ocnt; + __entry->cnt = tic->t_cnt; + __entry->curr_res = tic->t_curr_res; + __entry->unit_res = tic->t_unit_res; + __entry->flags = tic->t_flags; + __entry->reserveq = list_empty(&log->l_reserveq); + __entry->writeq = list_empty(&log->l_writeq); + xlog_crack_grant_head(&log->l_grant_reserve_head, + &__entry->grant_reserve_cycle, + &__entry->grant_reserve_bytes); + xlog_crack_grant_head(&log->l_grant_write_head, + &__entry->grant_write_cycle, + &__entry->grant_write_bytes); + __entry->curr_cycle = log->l_curr_cycle; + __entry->curr_block = log->l_curr_block; + __entry->tail_lsn = atomic64_read(&log->l_tail_lsn); + ), + TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " + "t_unit_res %u t_flags %s reserveq %s " + "writeq %s grant_reserve_cycle %d " + "grant_reserve_bytes %d grant_write_cycle %d " + "grant_write_bytes %d curr_cycle %d curr_block %d " + "tail_cycle %d tail_block %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), + __entry->ocnt, + __entry->cnt, + __entry->curr_res, + __entry->unit_res, + __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), + __entry->reserveq ? "empty" : "active", + __entry->writeq ? "empty" : "active", + __entry->grant_reserve_cycle, + __entry->grant_reserve_bytes, + __entry->grant_write_cycle, + __entry->grant_write_bytes, + __entry->curr_cycle, + __entry->curr_block, + CYCLE_LSN(__entry->tail_lsn), + BLOCK_LSN(__entry->tail_lsn) + ) +) + +#define DEFINE_LOGGRANT_EVENT(name) \ +DEFINE_EVENT(xfs_loggrant_class, name, \ + TP_PROTO(struct log *log, struct xlog_ticket *tic), \ + TP_ARGS(log, tic)) +DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm); +DEFINE_LOGGRANT_EVENT(xfs_log_done_perm); +DEFINE_LOGGRANT_EVENT(xfs_log_reserve); +DEFINE_LOGGRANT_EVENT(xfs_log_umount_write); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_error); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2); +DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub); +DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter); +DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit); +DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub); + +DECLARE_EVENT_CLASS(xfs_file_class, + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), + TP_ARGS(ip, count, offset, flags), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + __entry->flags = flags; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + "offset 0x%llx count 0x%zx ioflags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size, + __entry->offset, + __entry->count, + __print_flags(__entry->flags, "|", XFS_IO_FLAGS)) +) + +#define DEFINE_RW_EVENT(name) \ +DEFINE_EVENT(xfs_file_class, name, \ + TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \ + TP_ARGS(ip, count, offset, flags)) +DEFINE_RW_EVENT(xfs_file_read); +DEFINE_RW_EVENT(xfs_file_buffered_write); +DEFINE_RW_EVENT(xfs_file_direct_write); +DEFINE_RW_EVENT(xfs_file_splice_read); +DEFINE_RW_EVENT(xfs_file_splice_write); + +DECLARE_EVENT_CLASS(xfs_page_class, + TP_PROTO(struct inode *inode, struct page *page, unsigned long off), + TP_ARGS(inode, page, off), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(pgoff_t, pgoff) + __field(loff_t, size) + __field(unsigned long, offset) + __field(int, delalloc) + __field(int, unwritten) + ), + TP_fast_assign( + int delalloc = -1, unwritten = -1; + + if (page_has_buffers(page)) + xfs_count_page_state(page, &delalloc, &unwritten); + __entry->dev = inode->i_sb->s_dev; + __entry->ino = XFS_I(inode)->i_ino; + __entry->pgoff = page_offset(page); + __entry->size = i_size_read(inode); + __entry->offset = off; + __entry->delalloc = delalloc; + __entry->unwritten = unwritten; + ), + TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " + "delalloc %d unwritten %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->pgoff, + __entry->size, + __entry->offset, + __entry->delalloc, + __entry->unwritten) +) + +#define DEFINE_PAGE_EVENT(name) \ +DEFINE_EVENT(xfs_page_class, name, \ + TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ + TP_ARGS(inode, page, off)) +DEFINE_PAGE_EVENT(xfs_writepage); +DEFINE_PAGE_EVENT(xfs_releasepage); +DEFINE_PAGE_EVENT(xfs_invalidatepage); + +DECLARE_EVENT_CLASS(xfs_imap_class, + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, + int type, struct xfs_bmbt_irec *irec), + TP_ARGS(ip, offset, count, type, irec), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(loff_t, size) + __field(loff_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + __field(int, type) + __field(xfs_fileoff_t, startoff) + __field(xfs_fsblock_t, startblock) + __field(xfs_filblks_t, blockcount) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + __entry->type = type; + __entry->startoff = irec ? irec->br_startoff : 0; + __entry->startblock = irec ? irec->br_startblock : 0; + __entry->blockcount = irec ? irec->br_blockcount : 0; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " + "offset 0x%llx count %zd type %s " + "startoff 0x%llx startblock %lld blockcount 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size, + __entry->offset, + __entry->count, + __print_symbolic(__entry->type, XFS_IO_TYPES), + __entry->startoff, + (__int64_t)__entry->startblock, + __entry->blockcount) +) + +#define DEFINE_IOMAP_EVENT(name) \ +DEFINE_EVENT(xfs_imap_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \ + int type, struct xfs_bmbt_irec *irec), \ + TP_ARGS(ip, offset, count, type, irec)) +DEFINE_IOMAP_EVENT(xfs_map_blocks_found); +DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); +DEFINE_IOMAP_EVENT(xfs_get_blocks_found); +DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); + +DECLARE_EVENT_CLASS(xfs_simple_io_class, + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), + TP_ARGS(ip, offset, count), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(loff_t, isize) + __field(loff_t, disize) + __field(loff_t, new_size) + __field(loff_t, offset) + __field(size_t, count) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->isize = ip->i_size; + __entry->disize = ip->i_d.di_size; + __entry->new_size = ip->i_new_size; + __entry->offset = offset; + __entry->count = count; + ), + TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx " + "offset 0x%llx count %zd", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->isize, + __entry->disize, + __entry->new_size, + __entry->offset, + __entry->count) +); + +#define DEFINE_SIMPLE_IO_EVENT(name) \ +DEFINE_EVENT(xfs_simple_io_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \ + TP_ARGS(ip, offset, count)) +DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); +DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); +DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); +DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); + +DECLARE_EVENT_CLASS(xfs_itrunc_class, + TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), + TP_ARGS(ip, new_size), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fsize_t, new_size) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->new_size = new_size; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->new_size) +) + +#define DEFINE_ITRUNC_EVENT(name) \ +DEFINE_EVENT(xfs_itrunc_class, name, \ + TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ + TP_ARGS(ip, new_size)) +DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start); +DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end); + +TRACE_EVENT(xfs_pagecache_inval, + TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), + TP_ARGS(ip, start, finish), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_off_t, start) + __field(xfs_off_t, finish) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->start = start; + __entry->finish = finish; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->start, + __entry->finish) +); + +TRACE_EVENT(xfs_bunmap, + TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, + int flags, unsigned long caller_ip), + TP_ARGS(ip, bno, len, flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_fsize_t, size) + __field(xfs_fileoff_t, bno) + __field(xfs_filblks_t, len) + __field(unsigned long, caller_ip) + __field(int, flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->ino = ip->i_ino; + __entry->size = ip->i_d.di_size; + __entry->bno = bno; + __entry->len = len; + __entry->caller_ip = caller_ip; + __entry->flags = flags; + ), + TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx" + "flags %s caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->bno, + __entry->len, + __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS), + (void *)__entry->caller_ip) + +); + +DECLARE_EVENT_CLASS(xfs_busy_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len), + TP_ARGS(mp, agno, agbno, len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len) +); +#define DEFINE_BUSY_EVENT(name) \ +DEFINE_EVENT(xfs_busy_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + xfs_agblock_t agbno, xfs_extlen_t len), \ + TP_ARGS(mp, agno, agbno, len)) +DEFINE_BUSY_EVENT(xfs_alloc_busy); +DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); +DEFINE_BUSY_EVENT(xfs_alloc_busy_force); +DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); +DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); + +TRACE_EVENT(xfs_alloc_busy_trim, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len, + xfs_agblock_t tbno, xfs_extlen_t tlen), + TP_ARGS(mp, agno, agbno, len, tbno, tlen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(xfs_agblock_t, tbno) + __field(xfs_extlen_t, tlen) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + __entry->tbno = tbno; + __entry->tlen = tlen; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len, + __entry->tbno, + __entry->tlen) +); + +TRACE_EVENT(xfs_trans_commit_lsn, + TP_PROTO(struct xfs_trans *trans), + TP_ARGS(trans), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(struct xfs_trans *, tp) + __field(xfs_lsn_t, lsn) + ), + TP_fast_assign( + __entry->dev = trans->t_mountp->m_super->s_dev; + __entry->tp = trans; + __entry->lsn = trans->t_commit_lsn; + ), + TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tp, + __entry->lsn) +); + +TRACE_EVENT(xfs_agf, + TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags, + unsigned long caller_ip), + TP_ARGS(mp, agf, flags, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, flags) + __field(__u32, length) + __field(__u32, bno_root) + __field(__u32, cnt_root) + __field(__u32, bno_level) + __field(__u32, cnt_level) + __field(__u32, flfirst) + __field(__u32, fllast) + __field(__u32, flcount) + __field(__u32, freeblks) + __field(__u32, longest) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = be32_to_cpu(agf->agf_seqno), + __entry->flags = flags; + __entry->length = be32_to_cpu(agf->agf_length), + __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]), + __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]), + __entry->bno_level = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]), + __entry->cnt_level = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]), + __entry->flfirst = be32_to_cpu(agf->agf_flfirst), + __entry->fllast = be32_to_cpu(agf->agf_fllast), + __entry->flcount = be32_to_cpu(agf->agf_flcount), + __entry->freeblks = be32_to_cpu(agf->agf_freeblks), + __entry->longest = be32_to_cpu(agf->agf_longest); + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u " + "levels b %u c %u flfirst %u fllast %u flcount %u " + "freeblks %u longest %u caller %pf", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __print_flags(__entry->flags, "|", XFS_AGF_FLAGS), + __entry->length, + __entry->bno_root, + __entry->cnt_root, + __entry->bno_level, + __entry->cnt_level, + __entry->flfirst, + __entry->fllast, + __entry->flcount, + __entry->freeblks, + __entry->longest, + (void *)__entry->caller_ip) +); + +TRACE_EVENT(xfs_free_extent, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno, + xfs_extlen_t len, bool isfl, int haveleft, int haveright), + TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(int, isfl) + __field(int, haveleft) + __field(int, haveright) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + __entry->isfl = isfl; + __entry->haveleft = haveleft; + __entry->haveright = haveright; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len, + __entry->isfl, + __entry->haveleft ? + (__entry->haveright ? "both" : "left") : + (__entry->haveright ? "right" : "none")) + +); + +DECLARE_EVENT_CLASS(xfs_alloc_class, + TP_PROTO(struct xfs_alloc_arg *args), + TP_ARGS(args), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, minlen) + __field(xfs_extlen_t, maxlen) + __field(xfs_extlen_t, mod) + __field(xfs_extlen_t, prod) + __field(xfs_extlen_t, minleft) + __field(xfs_extlen_t, total) + __field(xfs_extlen_t, alignment) + __field(xfs_extlen_t, minalignslop) + __field(xfs_extlen_t, len) + __field(short, type) + __field(short, otype) + __field(char, wasdel) + __field(char, wasfromfl) + __field(char, isfl) + __field(char, userdata) + __field(xfs_fsblock_t, firstblock) + ), + TP_fast_assign( + __entry->dev = args->mp->m_super->s_dev; + __entry->agno = args->agno; + __entry->agbno = args->agbno; + __entry->minlen = args->minlen; + __entry->maxlen = args->maxlen; + __entry->mod = args->mod; + __entry->prod = args->prod; + __entry->minleft = args->minleft; + __entry->total = args->total; + __entry->alignment = args->alignment; + __entry->minalignslop = args->minalignslop; + __entry->len = args->len; + __entry->type = args->type; + __entry->otype = args->otype; + __entry->wasdel = args->wasdel; + __entry->wasfromfl = args->wasfromfl; + __entry->isfl = args->isfl; + __entry->userdata = args->userdata; + __entry->firstblock = args->firstblock; + ), + TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " + "prod %u minleft %u total %u alignment %u minalignslop %u " + "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " + "userdata %d firstblock 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->minlen, + __entry->maxlen, + __entry->mod, + __entry->prod, + __entry->minleft, + __entry->total, + __entry->alignment, + __entry->minalignslop, + __entry->len, + __print_symbolic(__entry->type, XFS_ALLOC_TYPES), + __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), + __entry->wasdel, + __entry->wasfromfl, + __entry->isfl, + __entry->userdata, + (unsigned long long)__entry->firstblock) +) + +#define DEFINE_ALLOC_EVENT(name) \ +DEFINE_EVENT(xfs_alloc_class, name, \ + TP_PROTO(struct xfs_alloc_arg *args), \ + TP_ARGS(args)) +DEFINE_ALLOC_EVENT(xfs_alloc_exact_done); +DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound); +DEFINE_ALLOC_EVENT(xfs_alloc_exact_error); +DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft); +DEFINE_ALLOC_EVENT(xfs_alloc_near_first); +DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); +DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); +DEFINE_ALLOC_EVENT(xfs_alloc_near_error); +DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); +DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); +DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); +DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); +DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); +DEFINE_ALLOC_EVENT(xfs_alloc_size_done); +DEFINE_ALLOC_EVENT(xfs_alloc_size_error); +DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); +DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); +DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); +DEFINE_ALLOC_EVENT(xfs_alloc_small_done); +DEFINE_ALLOC_EVENT(xfs_alloc_small_error); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed); + +DECLARE_EVENT_CLASS(xfs_dir2_class, + TP_PROTO(struct xfs_da_args *args), + TP_ARGS(args), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __dynamic_array(char, name, args->namelen) + __field(int, namelen) + __field(xfs_dahash_t, hashval) + __field(xfs_ino_t, inumber) + __field(int, op_flags) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + if (args->namelen) + memcpy(__get_str(name), args->name, args->namelen); + __entry->namelen = args->namelen; + __entry->hashval = args->hashval; + __entry->inumber = args->inumber; + __entry->op_flags = args->op_flags; + ), + TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " + "inumber 0x%llx op_flags %s", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->namelen, + __entry->namelen ? __get_str(name) : NULL, + __entry->namelen, + __entry->hashval, + __entry->inumber, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) +) + +#define DEFINE_DIR2_EVENT(name) \ +DEFINE_EVENT(xfs_dir2_class, name, \ + TP_PROTO(struct xfs_da_args *args), \ + TP_ARGS(args)) +DEFINE_DIR2_EVENT(xfs_dir2_sf_addname); +DEFINE_DIR2_EVENT(xfs_dir2_sf_create); +DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_sf_replace); +DEFINE_DIR2_EVENT(xfs_dir2_sf_removename); +DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4); +DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8); +DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block); +DEFINE_DIR2_EVENT(xfs_dir2_block_addname); +DEFINE_DIR2_EVENT(xfs_dir2_block_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_block_replace); +DEFINE_DIR2_EVENT(xfs_dir2_block_removename); +DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf); +DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block); +DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node); +DEFINE_DIR2_EVENT(xfs_dir2_node_addname); +DEFINE_DIR2_EVENT(xfs_dir2_node_lookup); +DEFINE_DIR2_EVENT(xfs_dir2_node_replace); +DEFINE_DIR2_EVENT(xfs_dir2_node_removename); +DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf); + +DECLARE_EVENT_CLASS(xfs_dir2_space_class, + TP_PROTO(struct xfs_da_args *args, int idx), + TP_ARGS(args, idx), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, op_flags) + __field(int, idx) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + __entry->op_flags = args->op_flags; + __entry->idx = idx; + ), + TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), + __entry->idx) +) + +#define DEFINE_DIR2_SPACE_EVENT(name) \ +DEFINE_EVENT(xfs_dir2_space_class, name, \ + TP_PROTO(struct xfs_da_args *args, int idx), \ + TP_ARGS(args, idx)) +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode); +DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode); + +TRACE_EVENT(xfs_dir2_leafn_moveents, + TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count), + TP_ARGS(args, src_idx, dst_idx, count), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(int, op_flags) + __field(int, src_idx) + __field(int, dst_idx) + __field(int, count) + ), + TP_fast_assign( + __entry->dev = VFS_I(args->dp)->i_sb->s_dev; + __entry->ino = args->dp->i_ino; + __entry->op_flags = args->op_flags; + __entry->src_idx = src_idx; + __entry->dst_idx = dst_idx; + __entry->count = count; + ), + TP_printk("dev %d:%d ino 0x%llx op_flags %s " + "src_idx %d dst_idx %d count %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), + __entry->src_idx, + __entry->dst_idx, + __entry->count) +); + +#define XFS_SWAPEXT_INODES \ + { 0, "target" }, \ + { 1, "temp" } + +#define XFS_INODE_FORMAT_STR \ + { 0, "invalid" }, \ + { 1, "local" }, \ + { 2, "extent" }, \ + { 3, "btree" } + +DECLARE_EVENT_CLASS(xfs_swap_extent_class, + TP_PROTO(struct xfs_inode *ip, int which), + TP_ARGS(ip, which), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, which) + __field(xfs_ino_t, ino) + __field(int, format) + __field(int, nex) + __field(int, max_nex) + __field(int, broot_size) + __field(int, fork_off) + ), + TP_fast_assign( + __entry->dev = VFS_I(ip)->i_sb->s_dev; + __entry->which = which; + __entry->ino = ip->i_ino; + __entry->format = ip->i_d.di_format; + __entry->nex = ip->i_d.di_nextents; + __entry->max_nex = ip->i_df.if_ext_max; + __entry->broot_size = ip->i_df.if_broot_bytes; + __entry->fork_off = XFS_IFORK_BOFF(ip); + ), + TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, " + "Max in-fork extents %d, broot size %d, fork offset %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_symbolic(__entry->which, XFS_SWAPEXT_INODES), + __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR), + __entry->nex, + __entry->max_nex, + __entry->broot_size, + __entry->fork_off) +) + +#define DEFINE_SWAPEXT_EVENT(name) \ +DEFINE_EVENT(xfs_swap_extent_class, name, \ + TP_PROTO(struct xfs_inode *ip, int which), \ + TP_ARGS(ip, which)) + +DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before); +DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after); + +DECLARE_EVENT_CLASS(xfs_log_recover_item_class, + TP_PROTO(struct log *log, struct xlog_recover *trans, + struct xlog_recover_item *item, int pass), + TP_ARGS(log, trans, item, pass), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned long, item) + __field(xlog_tid_t, tid) + __field(int, type) + __field(int, pass) + __field(int, count) + __field(int, total) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->item = (unsigned long)item; + __entry->tid = trans->r_log_tid; + __entry->type = ITEM_TYPE(item); + __entry->pass = pass; + __entry->count = item->ri_cnt; + __entry->total = item->ri_total; + ), + TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s " + "item region count/total %d/%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->tid, + __entry->pass, + (void *)__entry->item, + __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), + __entry->count, + __entry->total) +) + +#define DEFINE_LOG_RECOVER_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_item_class, name, \ + TP_PROTO(struct log *log, struct xlog_recover *trans, \ + struct xlog_recover_item *item, int pass), \ + TP_ARGS(log, trans, item, pass)) + +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail); +DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover); + +DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class, + TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), + TP_ARGS(log, buf_f), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(__int64_t, blkno) + __field(unsigned short, len) + __field(unsigned short, flags) + __field(unsigned short, size) + __field(unsigned int, map_size) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->blkno = buf_f->blf_blkno; + __entry->len = buf_f->blf_len; + __entry->flags = buf_f->blf_flags; + __entry->size = buf_f->blf_size; + __entry->map_size = buf_f->blf_map_size; + ), + TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, " + "map_size %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->blkno, + __entry->len, + __entry->flags, + __entry->size, + __entry->map_size) +) + +#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \ + TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \ + TP_ARGS(log, buf_f)) + +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf); +DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf); + +DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class, + TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), + TP_ARGS(log, in_f), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned short, size) + __field(int, fields) + __field(unsigned short, asize) + __field(unsigned short, dsize) + __field(__int64_t, blkno) + __field(int, len) + __field(int, boffset) + ), + TP_fast_assign( + __entry->dev = log->l_mp->m_super->s_dev; + __entry->ino = in_f->ilf_ino; + __entry->size = in_f->ilf_size; + __entry->fields = in_f->ilf_fields; + __entry->asize = in_f->ilf_asize; + __entry->dsize = in_f->ilf_dsize; + __entry->blkno = in_f->ilf_blkno; + __entry->len = in_f->ilf_len; + __entry->boffset = in_f->ilf_boffset; + ), + TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, " + "dsize %d, blkno 0x%llx, len %d, boffset %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->size, + __entry->fields, + __entry->asize, + __entry->dsize, + __entry->blkno, + __entry->len, + __entry->boffset) +) +#define DEFINE_LOG_RECOVER_INO_ITEM(name) \ +DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \ + TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \ + TP_ARGS(log, in_f)) + +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover); +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel); +DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip); + +DECLARE_EVENT_CLASS(xfs_discard_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len), + TP_ARGS(mp, agno, agbno, len), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u\n", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len) +) + +#define DEFINE_DISCARD_EVENT(name) \ +DEFINE_EVENT(xfs_discard_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + xfs_agblock_t agbno, xfs_extlen_t len), \ + TP_ARGS(mp, agno, agbno, len)) +DEFINE_DISCARD_EVENT(xfs_discard_extent); +DEFINE_DISCARD_EVENT(xfs_discard_toosmall); +DEFINE_DISCARD_EVENT(xfs_discard_exclude); +DEFINE_DISCARD_EVENT(xfs_discard_busy); + +#endif /* _TRACE_XFS_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE xfs_trace +#include diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c new file mode 100644 index 000000000000..4d00ee67792d --- /dev/null +++ b/fs/xfs/xfs_trans_dquot.c @@ -0,0 +1,890 @@ +/* + * Copyright (c) 2000-2002 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_alloc.h" +#include "xfs_quota.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_itable.h" +#include "xfs_bmap.h" +#include "xfs_rtalloc.h" +#include "xfs_error.h" +#include "xfs_attr.h" +#include "xfs_buf_item.h" +#include "xfs_trans_priv.h" +#include "xfs_qm.h" + +STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *); + +/* + * Add the locked dquot to the transaction. + * The dquot must be locked, and it cannot be associated with any + * transaction. + */ +void +xfs_trans_dqjoin( + xfs_trans_t *tp, + xfs_dquot_t *dqp) +{ + ASSERT(dqp->q_transp != tp); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(dqp->q_logitem.qli_dquot == dqp); + + /* + * Get a log_item_desc to point at the new item. + */ + xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); + + /* + * Initialize d_transp so we can later determine if this dquot is + * associated with this transaction. + */ + dqp->q_transp = tp; +} + + +/* + * This is called to mark the dquot as needing + * to be logged when the transaction is committed. The dquot must + * already be associated with the given transaction. + * Note that it marks the entire transaction as dirty. In the ordinary + * case, this gets called via xfs_trans_commit, after the transaction + * is already dirty. However, there's nothing stop this from getting + * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY + * flag. + */ +void +xfs_trans_log_dquot( + xfs_trans_t *tp, + xfs_dquot_t *dqp) +{ + ASSERT(dqp->q_transp == tp); + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + + tp->t_flags |= XFS_TRANS_DIRTY; + dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY; +} + +/* + * Carry forward whatever is left of the quota blk reservation to + * the spanky new transaction + */ +void +xfs_trans_dup_dqinfo( + xfs_trans_t *otp, + xfs_trans_t *ntp) +{ + xfs_dqtrx_t *oq, *nq; + int i,j; + xfs_dqtrx_t *oqa, *nqa; + + if (!otp->t_dqinfo) + return; + + xfs_trans_alloc_dqinfo(ntp); + oqa = otp->t_dqinfo->dqa_usrdquots; + nqa = ntp->t_dqinfo->dqa_usrdquots; + + /* + * Because the quota blk reservation is carried forward, + * it is also necessary to carry forward the DQ_DIRTY flag. + */ + if(otp->t_flags & XFS_TRANS_DQ_DIRTY) + ntp->t_flags |= XFS_TRANS_DQ_DIRTY; + + for (j = 0; j < 2; j++) { + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + if (oqa[i].qt_dquot == NULL) + break; + oq = &oqa[i]; + nq = &nqa[i]; + + nq->qt_dquot = oq->qt_dquot; + nq->qt_bcount_delta = nq->qt_icount_delta = 0; + nq->qt_rtbcount_delta = 0; + + /* + * Transfer whatever is left of the reservations. + */ + nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used; + oq->qt_blk_res = oq->qt_blk_res_used; + + nq->qt_rtblk_res = oq->qt_rtblk_res - + oq->qt_rtblk_res_used; + oq->qt_rtblk_res = oq->qt_rtblk_res_used; + + nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used; + oq->qt_ino_res = oq->qt_ino_res_used; + + } + oqa = otp->t_dqinfo->dqa_grpdquots; + nqa = ntp->t_dqinfo->dqa_grpdquots; + } +} + +/* + * Wrap around mod_dquot to account for both user and group quotas. + */ +void +xfs_trans_mod_dquot_byino( + xfs_trans_t *tp, + xfs_inode_t *ip, + uint field, + long delta) +{ + xfs_mount_t *mp = tp->t_mountp; + + if (!XFS_IS_QUOTA_RUNNING(mp) || + !XFS_IS_QUOTA_ON(mp) || + ip->i_ino == mp->m_sb.sb_uquotino || + ip->i_ino == mp->m_sb.sb_gquotino) + return; + + if (tp->t_dqinfo == NULL) + xfs_trans_alloc_dqinfo(tp); + + if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) + (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); + if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot) + (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); +} + +STATIC xfs_dqtrx_t * +xfs_trans_get_dqtrx( + xfs_trans_t *tp, + xfs_dquot_t *dqp) +{ + int i; + xfs_dqtrx_t *qa; + + qa = XFS_QM_ISUDQ(dqp) ? + tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots; + + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + if (qa[i].qt_dquot == NULL || + qa[i].qt_dquot == dqp) + return &qa[i]; + } + + return NULL; +} + +/* + * Make the changes in the transaction structure. + * The moral equivalent to xfs_trans_mod_sb(). + * We don't touch any fields in the dquot, so we don't care + * if it's locked or not (most of the time it won't be). + */ +void +xfs_trans_mod_dquot( + xfs_trans_t *tp, + xfs_dquot_t *dqp, + uint field, + long delta) +{ + xfs_dqtrx_t *qtrx; + + ASSERT(tp); + ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); + qtrx = NULL; + + if (tp->t_dqinfo == NULL) + xfs_trans_alloc_dqinfo(tp); + /* + * Find either the first free slot or the slot that belongs + * to this dquot. + */ + qtrx = xfs_trans_get_dqtrx(tp, dqp); + ASSERT(qtrx); + if (qtrx->qt_dquot == NULL) + qtrx->qt_dquot = dqp; + + switch (field) { + + /* + * regular disk blk reservation + */ + case XFS_TRANS_DQ_RES_BLKS: + qtrx->qt_blk_res += (ulong)delta; + break; + + /* + * inode reservation + */ + case XFS_TRANS_DQ_RES_INOS: + qtrx->qt_ino_res += (ulong)delta; + break; + + /* + * disk blocks used. + */ + case XFS_TRANS_DQ_BCOUNT: + if (qtrx->qt_blk_res && delta > 0) { + qtrx->qt_blk_res_used += (ulong)delta; + ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used); + } + qtrx->qt_bcount_delta += delta; + break; + + case XFS_TRANS_DQ_DELBCOUNT: + qtrx->qt_delbcnt_delta += delta; + break; + + /* + * Inode Count + */ + case XFS_TRANS_DQ_ICOUNT: + if (qtrx->qt_ino_res && delta > 0) { + qtrx->qt_ino_res_used += (ulong)delta; + ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used); + } + qtrx->qt_icount_delta += delta; + break; + + /* + * rtblk reservation + */ + case XFS_TRANS_DQ_RES_RTBLKS: + qtrx->qt_rtblk_res += (ulong)delta; + break; + + /* + * rtblk count + */ + case XFS_TRANS_DQ_RTBCOUNT: + if (qtrx->qt_rtblk_res && delta > 0) { + qtrx->qt_rtblk_res_used += (ulong)delta; + ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used); + } + qtrx->qt_rtbcount_delta += delta; + break; + + case XFS_TRANS_DQ_DELRTBCOUNT: + qtrx->qt_delrtb_delta += delta; + break; + + default: + ASSERT(0); + } + tp->t_flags |= XFS_TRANS_DQ_DIRTY; +} + + +/* + * Given an array of dqtrx structures, lock all the dquots associated + * and join them to the transaction, provided they have been modified. + * We know that the highest number of dquots (of one type - usr OR grp), + * involved in a transaction is 2 and that both usr and grp combined - 3. + * So, we don't attempt to make this very generic. + */ +STATIC void +xfs_trans_dqlockedjoin( + xfs_trans_t *tp, + xfs_dqtrx_t *q) +{ + ASSERT(q[0].qt_dquot != NULL); + if (q[1].qt_dquot == NULL) { + xfs_dqlock(q[0].qt_dquot); + xfs_trans_dqjoin(tp, q[0].qt_dquot); + } else { + ASSERT(XFS_QM_TRANS_MAXDQS == 2); + xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot); + xfs_trans_dqjoin(tp, q[0].qt_dquot); + xfs_trans_dqjoin(tp, q[1].qt_dquot); + } +} + + +/* + * Called by xfs_trans_commit() and similar in spirit to + * xfs_trans_apply_sb_deltas(). + * Go thru all the dquots belonging to this transaction and modify the + * INCORE dquot to reflect the actual usages. + * Unreserve just the reservations done by this transaction. + * dquot is still left locked at exit. + */ +void +xfs_trans_apply_dquot_deltas( + xfs_trans_t *tp) +{ + int i, j; + xfs_dquot_t *dqp; + xfs_dqtrx_t *qtrx, *qa; + xfs_disk_dquot_t *d; + long totalbdelta; + long totalrtbdelta; + + if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + return; + + ASSERT(tp->t_dqinfo); + qa = tp->t_dqinfo->dqa_usrdquots; + for (j = 0; j < 2; j++) { + if (qa[0].qt_dquot == NULL) { + qa = tp->t_dqinfo->dqa_grpdquots; + continue; + } + + /* + * Lock all of the dquots and join them to the transaction. + */ + xfs_trans_dqlockedjoin(tp, qa); + + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + qtrx = &qa[i]; + /* + * The array of dquots is filled + * sequentially, not sparsely. + */ + if ((dqp = qtrx->qt_dquot) == NULL) + break; + + ASSERT(XFS_DQ_IS_LOCKED(dqp)); + ASSERT(dqp->q_transp == tp); + + /* + * adjust the actual number of blocks used + */ + d = &dqp->q_core; + + /* + * The issue here is - sometimes we don't make a blkquota + * reservation intentionally to be fair to users + * (when the amount is small). On the other hand, + * delayed allocs do make reservations, but that's + * outside of a transaction, so we have no + * idea how much was really reserved. + * So, here we've accumulated delayed allocation blks and + * non-delay blks. The assumption is that the + * delayed ones are always reserved (outside of a + * transaction), and the others may or may not have + * quota reservations. + */ + totalbdelta = qtrx->qt_bcount_delta + + qtrx->qt_delbcnt_delta; + totalrtbdelta = qtrx->qt_rtbcount_delta + + qtrx->qt_delrtb_delta; +#ifdef DEBUG + if (totalbdelta < 0) + ASSERT(be64_to_cpu(d->d_bcount) >= + -totalbdelta); + + if (totalrtbdelta < 0) + ASSERT(be64_to_cpu(d->d_rtbcount) >= + -totalrtbdelta); + + if (qtrx->qt_icount_delta < 0) + ASSERT(be64_to_cpu(d->d_icount) >= + -qtrx->qt_icount_delta); +#endif + if (totalbdelta) + be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); + + if (qtrx->qt_icount_delta) + be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta); + + if (totalrtbdelta) + be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta); + + /* + * Get any default limits in use. + * Start/reset the timer(s) if needed. + */ + if (d->d_id) { + xfs_qm_adjust_dqlimits(tp->t_mountp, d); + xfs_qm_adjust_dqtimers(tp->t_mountp, d); + } + + dqp->dq_flags |= XFS_DQ_DIRTY; + /* + * add this to the list of items to get logged + */ + xfs_trans_log_dquot(tp, dqp); + /* + * Take off what's left of the original reservation. + * In case of delayed allocations, there's no + * reservation that a transaction structure knows of. + */ + if (qtrx->qt_blk_res != 0) { + if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) { + if (qtrx->qt_blk_res > + qtrx->qt_blk_res_used) + dqp->q_res_bcount -= (xfs_qcnt_t) + (qtrx->qt_blk_res - + qtrx->qt_blk_res_used); + else + dqp->q_res_bcount -= (xfs_qcnt_t) + (qtrx->qt_blk_res_used - + qtrx->qt_blk_res); + } + } else { + /* + * These blks were never reserved, either inside + * a transaction or outside one (in a delayed + * allocation). Also, this isn't always a + * negative number since we sometimes + * deliberately skip quota reservations. + */ + if (qtrx->qt_bcount_delta) { + dqp->q_res_bcount += + (xfs_qcnt_t)qtrx->qt_bcount_delta; + } + } + /* + * Adjust the RT reservation. + */ + if (qtrx->qt_rtblk_res != 0) { + if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) { + if (qtrx->qt_rtblk_res > + qtrx->qt_rtblk_res_used) + dqp->q_res_rtbcount -= (xfs_qcnt_t) + (qtrx->qt_rtblk_res - + qtrx->qt_rtblk_res_used); + else + dqp->q_res_rtbcount -= (xfs_qcnt_t) + (qtrx->qt_rtblk_res_used - + qtrx->qt_rtblk_res); + } + } else { + if (qtrx->qt_rtbcount_delta) + dqp->q_res_rtbcount += + (xfs_qcnt_t)qtrx->qt_rtbcount_delta; + } + + /* + * Adjust the inode reservation. + */ + if (qtrx->qt_ino_res != 0) { + ASSERT(qtrx->qt_ino_res >= + qtrx->qt_ino_res_used); + if (qtrx->qt_ino_res > qtrx->qt_ino_res_used) + dqp->q_res_icount -= (xfs_qcnt_t) + (qtrx->qt_ino_res - + qtrx->qt_ino_res_used); + } else { + if (qtrx->qt_icount_delta) + dqp->q_res_icount += + (xfs_qcnt_t)qtrx->qt_icount_delta; + } + + ASSERT(dqp->q_res_bcount >= + be64_to_cpu(dqp->q_core.d_bcount)); + ASSERT(dqp->q_res_icount >= + be64_to_cpu(dqp->q_core.d_icount)); + ASSERT(dqp->q_res_rtbcount >= + be64_to_cpu(dqp->q_core.d_rtbcount)); + } + /* + * Do the group quotas next + */ + qa = tp->t_dqinfo->dqa_grpdquots; + } +} + +/* + * Release the reservations, and adjust the dquots accordingly. + * This is called only when the transaction is being aborted. If by + * any chance we have done dquot modifications incore (ie. deltas) already, + * we simply throw those away, since that's the expected behavior + * when a transaction is curtailed without a commit. + */ +void +xfs_trans_unreserve_and_mod_dquots( + xfs_trans_t *tp) +{ + int i, j; + xfs_dquot_t *dqp; + xfs_dqtrx_t *qtrx, *qa; + boolean_t locked; + + if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + return; + + qa = tp->t_dqinfo->dqa_usrdquots; + + for (j = 0; j < 2; j++) { + for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) { + qtrx = &qa[i]; + /* + * We assume that the array of dquots is filled + * sequentially, not sparsely. + */ + if ((dqp = qtrx->qt_dquot) == NULL) + break; + /* + * Unreserve the original reservation. We don't care + * about the number of blocks used field, or deltas. + * Also we don't bother to zero the fields. + */ + locked = B_FALSE; + if (qtrx->qt_blk_res) { + xfs_dqlock(dqp); + locked = B_TRUE; + dqp->q_res_bcount -= + (xfs_qcnt_t)qtrx->qt_blk_res; + } + if (qtrx->qt_ino_res) { + if (!locked) { + xfs_dqlock(dqp); + locked = B_TRUE; + } + dqp->q_res_icount -= + (xfs_qcnt_t)qtrx->qt_ino_res; + } + + if (qtrx->qt_rtblk_res) { + if (!locked) { + xfs_dqlock(dqp); + locked = B_TRUE; + } + dqp->q_res_rtbcount -= + (xfs_qcnt_t)qtrx->qt_rtblk_res; + } + if (locked) + xfs_dqunlock(dqp); + + } + qa = tp->t_dqinfo->dqa_grpdquots; + } +} + +STATIC void +xfs_quota_warn( + struct xfs_mount *mp, + struct xfs_dquot *dqp, + int type) +{ + /* no warnings for project quotas - we just return ENOSPC later */ + if (dqp->dq_flags & XFS_DQ_PROJ) + return; + quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA, + be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev, + type); +} + +/* + * This reserves disk blocks and inodes against a dquot. + * Flags indicate if the dquot is to be locked here and also + * if the blk reservation is for RT or regular blocks. + * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check. + */ +STATIC int +xfs_trans_dqresv( + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_dquot_t *dqp, + long nblks, + long ninos, + uint flags) +{ + xfs_qcnt_t hardlimit; + xfs_qcnt_t softlimit; + time_t timer; + xfs_qwarncnt_t warns; + xfs_qwarncnt_t warnlimit; + xfs_qcnt_t count; + xfs_qcnt_t *resbcountp; + xfs_quotainfo_t *q = mp->m_quotainfo; + + + xfs_dqlock(dqp); + + if (flags & XFS_TRANS_DQ_RES_BLKS) { + hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); + if (!hardlimit) + hardlimit = q->qi_bhardlimit; + softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit); + if (!softlimit) + softlimit = q->qi_bsoftlimit; + timer = be32_to_cpu(dqp->q_core.d_btimer); + warns = be16_to_cpu(dqp->q_core.d_bwarns); + warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit; + resbcountp = &dqp->q_res_bcount; + } else { + ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); + hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit); + if (!hardlimit) + hardlimit = q->qi_rtbhardlimit; + softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit); + if (!softlimit) + softlimit = q->qi_rtbsoftlimit; + timer = be32_to_cpu(dqp->q_core.d_rtbtimer); + warns = be16_to_cpu(dqp->q_core.d_rtbwarns); + warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit; + resbcountp = &dqp->q_res_rtbcount; + } + + if ((flags & XFS_QMOPT_FORCE_RES) == 0 && + dqp->q_core.d_id && + ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || + (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && + (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { + if (nblks > 0) { + /* + * dquot is locked already. See if we'd go over the + * hardlimit or exceed the timelimit if we allocate + * nblks. + */ + if (hardlimit > 0ULL && + hardlimit <= nblks + *resbcountp) { + xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN); + goto error_return; + } + if (softlimit > 0ULL && + softlimit <= nblks + *resbcountp) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { + xfs_quota_warn(mp, dqp, + QUOTA_NL_BSOFTLONGWARN); + goto error_return; + } + + xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN); + } + } + if (ninos > 0) { + count = be64_to_cpu(dqp->q_core.d_icount); + timer = be32_to_cpu(dqp->q_core.d_itimer); + warns = be16_to_cpu(dqp->q_core.d_iwarns); + warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; + hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); + if (!hardlimit) + hardlimit = q->qi_ihardlimit; + softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); + if (!softlimit) + softlimit = q->qi_isoftlimit; + + if (hardlimit > 0ULL && count >= hardlimit) { + xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); + goto error_return; + } + if (softlimit > 0ULL && count >= softlimit) { + if ((timer != 0 && get_seconds() > timer) || + (warns != 0 && warns >= warnlimit)) { + xfs_quota_warn(mp, dqp, + QUOTA_NL_ISOFTLONGWARN); + goto error_return; + } + xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN); + } + } + } + + /* + * Change the reservation, but not the actual usage. + * Note that q_res_bcount = q_core.d_bcount + resv + */ + (*resbcountp) += (xfs_qcnt_t)nblks; + if (ninos != 0) + dqp->q_res_icount += (xfs_qcnt_t)ninos; + + /* + * note the reservation amt in the trans struct too, + * so that the transaction knows how much was reserved by + * it against this particular dquot. + * We don't do this when we are reserving for a delayed allocation, + * because we don't have the luxury of a transaction envelope then. + */ + if (tp) { + ASSERT(tp->t_dqinfo); + ASSERT(flags & XFS_QMOPT_RESBLK_MASK); + if (nblks != 0) + xfs_trans_mod_dquot(tp, dqp, + flags & XFS_QMOPT_RESBLK_MASK, + nblks); + if (ninos != 0) + xfs_trans_mod_dquot(tp, dqp, + XFS_TRANS_DQ_RES_INOS, + ninos); + } + ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount)); + ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount)); + ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount)); + + xfs_dqunlock(dqp); + return 0; + +error_return: + xfs_dqunlock(dqp); + if (flags & XFS_QMOPT_ENOSPC) + return ENOSPC; + return EDQUOT; +} + + +/* + * Given dquot(s), make disk block and/or inode reservations against them. + * The fact that this does the reservation against both the usr and + * grp/prj quotas is important, because this follows a both-or-nothing + * approach. + * + * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown. + * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota. + * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks + * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks + * dquots are unlocked on return, if they were not locked by caller. + */ +int +xfs_trans_reserve_quota_bydquots( + xfs_trans_t *tp, + xfs_mount_t *mp, + xfs_dquot_t *udqp, + xfs_dquot_t *gdqp, + long nblks, + long ninos, + uint flags) +{ + int resvd = 0, error; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return 0; + + if (tp && tp->t_dqinfo == NULL) + xfs_trans_alloc_dqinfo(tp); + + ASSERT(flags & XFS_QMOPT_RESBLK_MASK); + + if (udqp) { + error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos, + (flags & ~XFS_QMOPT_ENOSPC)); + if (error) + return error; + resvd = 1; + } + + if (gdqp) { + error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags); + if (error) { + /* + * can't do it, so backout previous reservation + */ + if (resvd) { + flags |= XFS_QMOPT_FORCE_RES; + xfs_trans_dqresv(tp, mp, udqp, + -nblks, -ninos, flags); + } + return error; + } + } + + /* + * Didn't change anything critical, so, no need to log + */ + return 0; +} + + +/* + * Lock the dquot and change the reservation if we can. + * This doesn't change the actual usage, just the reservation. + * The inode sent in is locked. + */ +int +xfs_trans_reserve_quota_nblks( + struct xfs_trans *tp, + struct xfs_inode *ip, + long nblks, + long ninos, + uint flags) +{ + struct xfs_mount *mp = ip->i_mount; + + if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) + return 0; + if (XFS_IS_PQUOTA_ON(mp)) + flags |= XFS_QMOPT_ENOSPC; + + ASSERT(ip->i_ino != mp->m_sb.sb_uquotino); + ASSERT(ip->i_ino != mp->m_sb.sb_gquotino); + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == + XFS_TRANS_DQ_RES_RTBLKS || + (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) == + XFS_TRANS_DQ_RES_BLKS); + + /* + * Reserve nblks against these dquots, with trans as the mediator. + */ + return xfs_trans_reserve_quota_bydquots(tp, mp, + ip->i_udquot, ip->i_gdquot, + nblks, ninos, flags); +} + +/* + * This routine is called to allocate a quotaoff log item. + */ +xfs_qoff_logitem_t * +xfs_trans_get_qoff_item( + xfs_trans_t *tp, + xfs_qoff_logitem_t *startqoff, + uint flags) +{ + xfs_qoff_logitem_t *q; + + ASSERT(tp != NULL); + + q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags); + ASSERT(q != NULL); + + /* + * Get a log_item_desc to point at the new item. + */ + xfs_trans_add_item(tp, &q->qql_item); + return q; +} + + +/* + * This is called to mark the quotaoff logitem as needing + * to be logged when the transaction is committed. The logitem must + * already be associated with the given transaction. + */ +void +xfs_trans_log_quotaoff_item( + xfs_trans_t *tp, + xfs_qoff_logitem_t *qlp) +{ + tp->t_flags |= XFS_TRANS_DIRTY; + qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY; +} + +STATIC void +xfs_trans_alloc_dqinfo( + xfs_trans_t *tp) +{ + tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP); +} + +void +xfs_trans_free_dqinfo( + xfs_trans_t *tp) +{ + if (!tp->t_dqinfo) + return; + kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo); + tp->t_dqinfo = NULL; +} diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h new file mode 100644 index 000000000000..7c220b4227bc --- /dev/null +++ b/fs/xfs/xfs_vnode.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_VNODE_H__ +#define __XFS_VNODE_H__ + +#include "xfs_fs.h" + +struct file; +struct xfs_inode; +struct xfs_iomap; +struct attrlist_cursor_kern; + +/* + * Return values for xfs_inactive. A return value of + * VN_INACTIVE_NOCACHE implies that the file system behavior + * has disassociated its state and bhv_desc_t from the vnode. + */ +#define VN_INACTIVE_CACHE 0 +#define VN_INACTIVE_NOCACHE 1 + +/* + * Flags for read/write calls - same values as IRIX + */ +#define IO_ISDIRECT 0x00004 /* bypass page cache */ +#define IO_INVIS 0x00020 /* don't update inode timestamps */ + +#define XFS_IO_FLAGS \ + { IO_ISDIRECT, "DIRECT" }, \ + { IO_INVIS, "INVIS"} + +/* + * Flush/Invalidate options for vop_toss/flush/flushinval_pages. + */ +#define FI_NONE 0 /* none */ +#define FI_REMAPF 1 /* Do a remapf prior to the operation */ +#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation. + Prevent VM access to the pages until + the operation completes. */ + +/* + * Some useful predicates. + */ +#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) +#define VN_CACHED(vp) (vp->i_mapping->nrpages) +#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \ + PAGECACHE_TAG_DIRTY) + + +#endif /* __XFS_VNODE_H__ */ diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c new file mode 100644 index 000000000000..87d3e03878c8 --- /dev/null +++ b/fs/xfs/xfs_xattr.c @@ -0,0 +1,241 @@ +/* + * Copyright (C) 2008 Christoph Hellwig. + * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_acl.h" +#include "xfs_vnodeops.h" + +#include +#include + + +static int +xfs_xattr_get(struct dentry *dentry, const char *name, + void *value, size_t size, int xflags) +{ + struct xfs_inode *ip = XFS_I(dentry->d_inode); + int error, asize = size; + + if (strcmp(name, "") == 0) + return -EINVAL; + + /* Convert Linux syscall to XFS internal ATTR flags */ + if (!size) { + xflags |= ATTR_KERNOVAL; + value = NULL; + } + + error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags); + if (error) + return error; + return asize; +} + +static int +xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags, int xflags) +{ + struct xfs_inode *ip = XFS_I(dentry->d_inode); + + if (strcmp(name, "") == 0) + return -EINVAL; + + /* Convert Linux syscall to XFS internal ATTR flags */ + if (flags & XATTR_CREATE) + xflags |= ATTR_CREATE; + if (flags & XATTR_REPLACE) + xflags |= ATTR_REPLACE; + + if (!value) + return -xfs_attr_remove(ip, (unsigned char *)name, xflags); + return -xfs_attr_set(ip, (unsigned char *)name, + (void *)value, size, xflags); +} + +static const struct xattr_handler xfs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .flags = 0, /* no flags implies user namespace */ + .get = xfs_xattr_get, + .set = xfs_xattr_set, +}; + +static const struct xattr_handler xfs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .flags = ATTR_ROOT, + .get = xfs_xattr_get, + .set = xfs_xattr_set, +}; + +static const struct xattr_handler xfs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .flags = ATTR_SECURE, + .get = xfs_xattr_get, + .set = xfs_xattr_set, +}; + +const struct xattr_handler *xfs_xattr_handlers[] = { + &xfs_xattr_user_handler, + &xfs_xattr_trusted_handler, + &xfs_xattr_security_handler, +#ifdef CONFIG_XFS_POSIX_ACL + &xfs_xattr_acl_access_handler, + &xfs_xattr_acl_default_handler, +#endif + NULL +}; + +static unsigned int xfs_xattr_prefix_len(int flags) +{ + if (flags & XFS_ATTR_SECURE) + return sizeof("security"); + else if (flags & XFS_ATTR_ROOT) + return sizeof("trusted"); + else + return sizeof("user"); +} + +static const char *xfs_xattr_prefix(int flags) +{ + if (flags & XFS_ATTR_SECURE) + return xfs_xattr_security_handler.prefix; + else if (flags & XFS_ATTR_ROOT) + return xfs_xattr_trusted_handler.prefix; + else + return xfs_xattr_user_handler.prefix; +} + +static int +xfs_xattr_put_listent( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) +{ + unsigned int prefix_len = xfs_xattr_prefix_len(flags); + char *offset; + int arraytop; + + ASSERT(context->count >= 0); + + /* + * Only show root namespace entries if we are actually allowed to + * see them. + */ + if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN)) + return 0; + + arraytop = context->count + prefix_len + namelen + 1; + if (arraytop > context->firstu) { + context->count = -1; /* insufficient space */ + return 1; + } + offset = (char *)context->alist + context->count; + strncpy(offset, xfs_xattr_prefix(flags), prefix_len); + offset += prefix_len; + strncpy(offset, (char *)name, namelen); /* real name */ + offset += namelen; + *offset = '\0'; + context->count += prefix_len + namelen + 1; + return 0; +} + +static int +xfs_xattr_put_listent_sizes( + struct xfs_attr_list_context *context, + int flags, + unsigned char *name, + int namelen, + int valuelen, + unsigned char *value) +{ + context->count += xfs_xattr_prefix_len(flags) + namelen + 1; + return 0; +} + +static int +list_one_attr(const char *name, const size_t len, void *data, + size_t size, ssize_t *result) +{ + char *p = data + *result; + + *result += len; + if (!size) + return 0; + if (*result > size) + return -ERANGE; + + strcpy(p, name); + return 0; +} + +ssize_t +xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) +{ + struct xfs_attr_list_context context; + struct attrlist_cursor_kern cursor = { 0 }; + struct inode *inode = dentry->d_inode; + int error; + + /* + * First read the regular on-disk attributes. + */ + memset(&context, 0, sizeof(context)); + context.dp = XFS_I(inode); + context.cursor = &cursor; + context.resynch = 1; + context.alist = data; + context.bufsize = size; + context.firstu = context.bufsize; + + if (size) + context.put_listent = xfs_xattr_put_listent; + else + context.put_listent = xfs_xattr_put_listent_sizes; + + xfs_attr_list_int(&context); + if (context.count < 0) + return -ERANGE; + + /* + * Then add the two synthetic ACL attributes. + */ + if (posix_acl_access_exists(inode)) { + error = list_one_attr(POSIX_ACL_XATTR_ACCESS, + strlen(POSIX_ACL_XATTR_ACCESS) + 1, + data, size, &context.count); + if (error) + return error; + } + + if (posix_acl_default_exists(inode)) { + error = list_one_attr(POSIX_ACL_XATTR_DEFAULT, + strlen(POSIX_ACL_XATTR_DEFAULT) + 1, + data, size, &context.count); + if (error) + return error; + } + + return context.count; +} diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index 3b8e028b9601..e8bffbe2ba4b 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -1,6 +1,6 @@ #include #include -#include "../fs/xfs/linux-2.6/xfs_sysctl.h" +#include "../fs/xfs/xfs_sysctl.h" #include #include #include diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 4e4932a7b360..362da653813d 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -1,6 +1,6 @@ #include #include -#include "../fs/xfs/linux-2.6/xfs_sysctl.h" +#include "../fs/xfs/xfs_sysctl.h" #include #include #include -- cgit v1.2.3 From aaff12039ffd812d0c8bbff50b87b6f1f09bec3e Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Sun, 7 Aug 2011 15:20:18 +0200 Subject: firewire: core: handle ack_busy when fetching the Config ROM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some older Panasonic made camcorders (Panasonic AG-EZ30 and NV-DX110, Grundig Scenos DLC 2000) reject requests with ack_busy_X if a request is sent immediately after they sent a response to a prior transaction. This causes firewire-core to fail probing of the camcorder with "giving up on config rom for node id ...". Consequently, programs like kino or dvgrab are unaware of the presence of a camcorder. Such transaction failures happen also with the ieee1394 driver stack (of the 2.4...2.6 kernel series until 2.6.36 inclusive) but with a lower likelihood, such that kino or dvgrab are generally able to use these camcorders via the older driver stack. The cause for firewire-ohci's or firewire-core's worse behavior is not yet known. Gap count optimization in firewire-core is not the cause. Perhaps the slightly higher latency of transaction completion in the older stack plays a role. (ieee1394: AR-resp DMA context tasklet -> packet completion ktread -> user process; firewire-core: tasklet -> user process.) This change introduces retries and delays after ack_busy_X into firewire-core's Config ROM reader, such that at least firewire-core's probing and /dev/fw* creation are successful. This still leaves the problem that userland processes are facing transaction failures. gscanbus's built-in retry routines deal with them successfully, but neither kino's nor dvgrab's do ever succeed. But at least DV capture with "dvgrab -noavc -card 0" works now. Live video preview in kino works too, but not actual capture. One way to prevent Configuration ROM reading failures in application programs is to modify libraw1394 to synthesize read responses by means of firewire-core's Configuration ROM cache. This would only leave CMP and FCP transaction failures as a potential problem source for applications. Reported-and-tested-by: Thomas Seilund Reported-and-tested-by: René Fritz Signed-off-by: Stefan Richter --- drivers/firewire/core-device.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/firewire/core-device.c b/drivers/firewire/core-device.c index 95a471401892..9f661e069318 100644 --- a/drivers/firewire/core-device.c +++ b/drivers/firewire/core-device.c @@ -455,15 +455,20 @@ static struct device_attribute fw_device_attributes[] = { static int read_rom(struct fw_device *device, int generation, int index, u32 *data) { - int rcode; + u64 offset = (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4; + int i, rcode; /* device->node_id, accessed below, must not be older than generation */ smp_rmb(); - rcode = fw_run_transaction(device->card, TCODE_READ_QUADLET_REQUEST, - device->node_id, generation, device->max_speed, - (CSR_REGISTER_BASE | CSR_CONFIG_ROM) + index * 4, - data, 4); + for (i = 10; i < 100; i += 10) { + rcode = fw_run_transaction(device->card, + TCODE_READ_QUADLET_REQUEST, device->node_id, + generation, device->max_speed, offset, data, 4); + if (rcode != RCODE_BUSY) + break; + msleep(i); + } be32_to_cpus(data); return rcode; -- cgit v1.2.3 From 441c850857148935babe000fc2ba1455fe54a6a9 Mon Sep 17 00:00:00 2001 From: Curt Wohlgemuth Date: Sat, 13 Aug 2011 11:25:18 -0400 Subject: ext4: Fix ext4_should_writeback_data() for no-journal mode ext4_should_writeback_data() had an incorrect sequence of tests to determine if it should return 0 or 1: in particular, even in no-journal mode, 0 was being returned for a non-regular-file inode. This meant that, in non-journal mode, we would use ext4_journalled_aops for directories, symlinks, and other non-regular files. However, calling journalled aop callbacks when there is no valid handle, can cause problems. This would cause a kernel crash with Jan Kara's commit 2d859db3e4 ("ext4: fix data corruption in inodes with journalled data"), because we now dereference 'handle' in ext4_journalled_write_end(). I also added BUG_ONs to check for a valid handle in the obviously journal-only aops callbacks. I tested this running xfstests with a scratch device in these modes: - no-journal - data=ordered - data=writeback - data=journal All work fine; the data=journal run has many failures and a crash in xfstests 074, but this is no different from a vanilla kernel. Signed-off-by: Curt Wohlgemuth Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/ext4_jbd2.h | 4 ++-- fs/ext4/inode.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index bb85757689b6..5802fa1dab18 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -289,10 +289,10 @@ static inline int ext4_should_order_data(struct inode *inode) static inline int ext4_should_writeback_data(struct inode *inode) { - if (!S_ISREG(inode->i_mode)) - return 0; if (EXT4_JOURNAL(inode) == NULL) return 1; + if (!S_ISREG(inode->i_mode)) + return 0; if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA)) return 0; if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d47264cafee0..ad3a7ca21069 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -983,6 +983,8 @@ static int ext4_journalled_write_end(struct file *file, from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; + BUG_ON(!ext4_handle_valid(handle)); + if (copied < len) { if (!PageUptodate(page)) copied = 0; @@ -1699,6 +1701,8 @@ static int __ext4_journalled_writepage(struct page *page, goto out; } + BUG_ON(!ext4_handle_valid(handle)); + ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, do_journal_get_write_access); -- cgit v1.2.3 From 2581fdc810889fdea97689cb62481201d579c796 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Sat, 13 Aug 2011 12:17:13 -0400 Subject: ext4: call ext4_ioend_wait and ext4_flush_completed_IO in ext4_evict_inode Flush inode's i_completed_io_list before calling ext4_io_wait to prevent the following deadlock scenario: A page fault happens while some process is writing inode A. During page fault, shrink_icache_memory is called that in turn evicts another inode B. Inode B has some pending io_end work so it calls ext4_ioend_wait() that waits for inode B's i_ioend_count to become zero. However, inode B's ioend work was queued behind some of inode A's ioend work on the same cpu's ext4-dio-unwritten workqueue. As the ext4-dio-unwritten thread on that cpu is processing inode A's ioend work, it tries to grab inode A's i_mutex lock. Since the i_mutex lock of inode A is still hold before the page fault happened, we enter a deadlock. Also moves ext4_flush_completed_IO and ext4_ioend_wait from ext4_destroy_inode() to ext4_evict_inode(). During inode deleteion, ext4_evict_inode() is called before ext4_destroy_inode() and in ext4_evict_inode(), we may call ext4_truncate() without holding i_mutex lock. As a result, there is a race between flush_completed_IO that is called from ext4_ext_truncate() and ext4_end_io_work, which may cause corruption on an io_end structure. This change moves ext4_flush_completed_IO and ext4_ioend_wait from ext4_destroy_inode() to ext4_evict_inode() to resolve the race between ext4_truncate() and ext4_end_io_work during inode deletion. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 6 ++++++ fs/ext4/super.c | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ad3a7ca21069..7dd698107822 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -120,6 +120,12 @@ void ext4_evict_inode(struct inode *inode) int err; trace_ext4_evict_inode(inode); + + mutex_lock(&inode->i_mutex); + ext4_flush_completed_IO(inode); + mutex_unlock(&inode->i_mutex); + ext4_ioend_wait(inode); + if (inode->i_nlink) { /* * When journalling data dirty buffers are tracked only in the diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4687fea0c00f..44d0c8db2239 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -919,7 +919,6 @@ static void ext4_i_callback(struct rcu_head *head) static void ext4_destroy_inode(struct inode *inode) { - ext4_ioend_wait(inode); if (!list_empty(&(EXT4_I(inode)->i_orphan))) { ext4_msg(inode->i_sb, KERN_ERR, "Inode %lu (%p): orphan list check failed!", -- cgit v1.2.3 From 32c80b32c053dc52712dedac5e4d0aa7c93fc353 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sat, 13 Aug 2011 12:30:59 -0400 Subject: ext4: Resolve the hang of direct i/o read in handling EXT4_IO_END_UNWRITTEN. EXT4_IO_END_UNWRITTEN flag set and the increase of i_aiodio_unwritten should be done simultaneously since ext4_end_io_nolock always clear the flag and decrease the counter in the same time. We don't increase i_aiodio_unwritten when setting EXT4_IO_END_UNWRITTEN so it will go nagative and causes some process to wait forever. Part of the patch came from Eric in his e-mail, but it doesn't fix the problem met by Michael actually. http://marc.info/?l=linux-ext4&m=131316851417460&w=2 Reported-and-Tested-by: Michael Tokarev Signed-off-by: Eric Sandeen Signed-off-by: Tao Ma Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 9 ++++++++- fs/ext4/page-io.c | 6 ++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7dd698107822..762e8037c888 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2678,8 +2678,15 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) goto out; } - io_end->flag = EXT4_IO_END_UNWRITTEN; + /* + * It may be over-defensive here to check EXT4_IO_END_UNWRITTEN now, + * but being more careful is always safe for the future change. + */ inode = io_end->inode; + if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + io_end->flag |= EXT4_IO_END_UNWRITTEN; + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } /* Add the io_end to per-inode completed io list*/ spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 430c401d0895..78839af7ce29 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -334,8 +334,10 @@ submit_and_retry: if ((io_end->num_io_pages >= MAX_IO_PAGES) && (io_end->pages[io_end->num_io_pages-1] != io_page)) goto submit_and_retry; - if (buffer_uninit(bh)) - io->io_end->flag |= EXT4_IO_END_UNWRITTEN; + if (buffer_uninit(bh) && !(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + io_end->flag |= EXT4_IO_END_UNWRITTEN; + atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); + } io->io_end->size += bh->b_size; io->io_next_block++; ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); -- cgit v1.2.3 From 9dd75f1f1a02d656a11a7b9b9e6c2759b9c1e946 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 13 Aug 2011 12:58:21 -0400 Subject: ext4: fix nomblk_io_submit option so it correctly converts uninit blocks Bug discovered by Jan Kara: Finally, commit 1449032be17abb69116dbc393f67ceb8bd034f92 returned back the old IO submission code but apparently it forgot to return the old handling of uninitialized buffers so we unconditionnaly call block_write_full_page() without specifying end_io function. So AFAICS we never convert unwritten extents to written in some cases. For example when I mount the fs as: mount -t ext4 -o nomblk_io_submit,dioread_nolock /dev/ubdb /mnt and do int fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0600); char buf[1024]; memset(buf, 'a', sizeof(buf)); fallocate(fd, 0, 0, 16384); write(fd, buf, sizeof(buf)); I get a file full of zeros (after remounting the filesystem so that pagecache is dropped) instead of seeing the first KB contain 'a's. Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 762e8037c888..c4da98a959ae 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1291,7 +1291,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT)) err = ext4_bio_write_page(&io_submit, page, len, mpd->wbc); - else + else if (buffer_uninit(page_bufs)) { + ext4_set_bh_endio(page_bufs, inode); + err = block_write_full_page_endio(page, + noalloc_get_block_write, + mpd->wbc, ext4_end_io_buffer_write); + } else err = block_write_full_page(page, noalloc_get_block_write, mpd->wbc); -- cgit v1.2.3 From 4eb60d869fdad7acd098b53bfd1863c311d8933d Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 13 Aug 2011 09:02:43 -0700 Subject: Revert "iwlagn: sysfs couldn't find the priv pointer" This reverts commit cc1a93e68f6c0d736b771f0746e8e4186f483fdc. This fix introduced a bug: bad pointer in unload. Signed-off-by: Emmanuel Grumbach Signed-off-by: Wey-Yi Guy Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-pci.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-pci.c b/drivers/net/wireless/iwlwifi/iwl-pci.c index 69d4ec467dca..fb7e436b40c7 100644 --- a/drivers/net/wireless/iwlwifi/iwl-pci.c +++ b/drivers/net/wireless/iwlwifi/iwl-pci.c @@ -134,7 +134,6 @@ static void iwl_pci_apm_config(struct iwl_bus *bus) static void iwl_pci_set_drv_data(struct iwl_bus *bus, void *drv_data) { bus->drv_data = drv_data; - pci_set_drvdata(IWL_BUS_GET_PCI_DEV(bus), drv_data); } static void iwl_pci_get_hw_id(struct iwl_bus *bus, char buf[], @@ -455,6 +454,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); } + pci_set_drvdata(pdev, bus); + bus->dev = &pdev->dev; bus->irq = pdev->irq; bus->ops = &pci_ops; @@ -493,12 +494,11 @@ static void iwl_pci_down(struct iwl_bus *bus) static void __devexit iwl_pci_remove(struct pci_dev *pdev) { - struct iwl_priv *priv = pci_get_drvdata(pdev); - void *bus_specific = priv->bus->bus_specific; + struct iwl_bus *bus = pci_get_drvdata(pdev); - iwl_remove(priv); + iwl_remove(bus->drv_data); - iwl_pci_down(bus_specific); + iwl_pci_down(bus); } #ifdef CONFIG_PM @@ -506,20 +506,20 @@ static void __devexit iwl_pci_remove(struct pci_dev *pdev) static int iwl_pci_suspend(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); - struct iwl_priv *priv = pci_get_drvdata(pdev); + struct iwl_bus *bus = pci_get_drvdata(pdev); /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if * WoWLAN is enabled - don't kill the NIC, someone may need it in Sx. */ - return iwl_suspend(priv); + return iwl_suspend(bus->drv_data); } static int iwl_pci_resume(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); - struct iwl_priv *priv = pci_get_drvdata(pdev); + struct iwl_bus *bus = pci_get_drvdata(pdev); /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if @@ -532,7 +532,7 @@ static int iwl_pci_resume(struct device *device) */ pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); - return iwl_resume(priv); + return iwl_resume(bus->drv_data); } static SIMPLE_DEV_PM_OPS(iwl_dev_pm_ops, iwl_pci_suspend, iwl_pci_resume); -- cgit v1.2.3 From 16a9d06c753abc44f66f88e03bbecb3f1e45d71b Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sat, 13 Aug 2011 09:02:44 -0700 Subject: iwlagn: sysfs couldn't find the priv pointer This bug has been introduced by: d593411084a56124aa9d80aafa15db8463b2d8f7 Author: Emmanuel Grumbach Date: Mon Jul 11 10:48:51 2011 +0300 iwlagn: simplify the bus architecture Revert part of the buggy patch: dev_get_drvdata will now return iwl_priv as it did before the patch. Signed-off-by: Emmanuel Grumbach Signed-off-by: Wey-Yi Guy Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-pci.c | 39 +++++++++++++++------------------- 1 file changed, 17 insertions(+), 22 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-pci.c b/drivers/net/wireless/iwlwifi/iwl-pci.c index fb7e436b40c7..2fdbffa079c1 100644 --- a/drivers/net/wireless/iwlwifi/iwl-pci.c +++ b/drivers/net/wireless/iwlwifi/iwl-pci.c @@ -134,6 +134,7 @@ static void iwl_pci_apm_config(struct iwl_bus *bus) static void iwl_pci_set_drv_data(struct iwl_bus *bus, void *drv_data) { bus->drv_data = drv_data; + pci_set_drvdata(IWL_BUS_GET_PCI_DEV(bus), drv_data); } static void iwl_pci_get_hw_id(struct iwl_bus *bus, char buf[], @@ -454,8 +455,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_write_config_word(pdev, PCI_COMMAND, pci_cmd); } - pci_set_drvdata(pdev, bus); - bus->dev = &pdev->dev; bus->irq = pdev->irq; bus->ops = &pci_ops; @@ -479,26 +478,22 @@ out_no_pci: return err; } -static void iwl_pci_down(struct iwl_bus *bus) -{ - struct iwl_pci_bus *pci_bus = (struct iwl_pci_bus *) bus->bus_specific; - - pci_disable_msi(pci_bus->pci_dev); - pci_iounmap(pci_bus->pci_dev, pci_bus->hw_base); - pci_release_regions(pci_bus->pci_dev); - pci_disable_device(pci_bus->pci_dev); - pci_set_drvdata(pci_bus->pci_dev, NULL); - - kfree(bus); -} - static void __devexit iwl_pci_remove(struct pci_dev *pdev) { - struct iwl_bus *bus = pci_get_drvdata(pdev); + struct iwl_priv *priv = pci_get_drvdata(pdev); + struct iwl_bus *bus = priv->bus; + struct iwl_pci_bus *pci_bus = IWL_BUS_GET_PCI_BUS(bus); + struct pci_dev *pci_dev = IWL_BUS_GET_PCI_DEV(bus); - iwl_remove(bus->drv_data); + iwl_remove(priv); - iwl_pci_down(bus); + pci_disable_msi(pci_dev); + pci_iounmap(pci_dev, pci_bus->hw_base); + pci_release_regions(pci_dev); + pci_disable_device(pci_dev); + pci_set_drvdata(pci_dev, NULL); + + kfree(bus); } #ifdef CONFIG_PM @@ -506,20 +501,20 @@ static void __devexit iwl_pci_remove(struct pci_dev *pdev) static int iwl_pci_suspend(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); - struct iwl_bus *bus = pci_get_drvdata(pdev); + struct iwl_priv *priv = pci_get_drvdata(pdev); /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if * WoWLAN is enabled - don't kill the NIC, someone may need it in Sx. */ - return iwl_suspend(bus->drv_data); + return iwl_suspend(priv); } static int iwl_pci_resume(struct device *device) { struct pci_dev *pdev = to_pci_dev(device); - struct iwl_bus *bus = pci_get_drvdata(pdev); + struct iwl_priv *priv = pci_get_drvdata(pdev); /* Before you put code here, think about WoWLAN. You cannot check here * whether WoWLAN is enabled or not, and your code will run even if @@ -532,7 +527,7 @@ static int iwl_pci_resume(struct device *device) */ pci_write_config_byte(pdev, PCI_CFG_RETRY_TIMEOUT, 0x00); - return iwl_resume(bus->drv_data); + return iwl_resume(priv); } static SIMPLE_DEV_PM_OPS(iwl_dev_pm_ops, iwl_pci_suspend, iwl_pci_resume); -- cgit v1.2.3 From 78869618a886d33d8cdfcb78cf9b245b5250e465 Mon Sep 17 00:00:00 2001 From: Aaron Lu Date: Mon, 11 Jul 2011 13:27:11 +0800 Subject: mmc: sdhci: fix retuning timer wrongly deleted in sdhci_tasklet_finish Currently, the retuning timer for retuning mode 1 will be deleted in function sdhci_tasklet_finish after a mmc request done, which will make retuning timing never trigger again. This patch fixed this problem. Signed-off-by: Aaron Lu Reviewed-by: Philip Rakity Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index c31a3343340d..262985a1a952 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1867,9 +1867,6 @@ static void sdhci_tasklet_finish(unsigned long param) del_timer(&host->timer); - if (host->version >= SDHCI_SPEC_300) - del_timer(&host->tuning_timer); - mrq = host->mrq; /* -- cgit v1.2.3 From 606a15e475880157dd2336f2dc220eacc9eaf36b Mon Sep 17 00:00:00 2001 From: Philip Rakity Date: Mon, 11 Jul 2011 14:47:54 -0700 Subject: mmc: sdhci: pxav3: controller needs 32 bit ADMA addressing Enable the quirk. (Best used in conjunction with patch downgrading ADMA to SDMA when transfer is not aligned.) Signed-off-by: Philip Rakity Acked-by: Zhangfei Gao Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-pxav3.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c index 4198dbbc5c20..fc7e4a515629 100644 --- a/drivers/mmc/host/sdhci-pxav3.c +++ b/drivers/mmc/host/sdhci-pxav3.c @@ -195,7 +195,8 @@ static int __devinit sdhci_pxav3_probe(struct platform_device *pdev) clk_enable(clk); host->quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL - | SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC; + | SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC + | SDHCI_QUIRK_32BIT_ADMA_SIZE; /* enable 1/8V DDR capable */ host->mmc->caps |= MMC_CAP_1_8V_DDR; -- cgit v1.2.3 From 7199e2b61d715c5e8901ff32513d2b80db8d3737 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Tue, 12 Jul 2011 17:30:47 +0900 Subject: mmc: sdhci-s3c: add BROKEN_ADMA_ZEROLEN_DESC quirk Samsung SoCs need to set BROKEN_ADMA_ZEROLEN_DESC. (If ADMA operation is more than 65535, maybe set by zero.) Signed-off-by: Jaehoon Chung Signed-off-by: Kyungmin Park Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-s3c.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c index 460ffaf0f6d7..03da44a1b2ab 100644 --- a/drivers/mmc/host/sdhci-s3c.c +++ b/drivers/mmc/host/sdhci-s3c.c @@ -502,6 +502,9 @@ static int __devinit sdhci_s3c_probe(struct platform_device *pdev) /* This host supports the Auto CMD12 */ host->quirks |= SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12; + /* Samsung SoCs need BROKEN_ADMA_ZEROLEN_DESC */ + host->quirks |= SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC; + if (pdata->cd_type == S3C_SDHCI_CD_NONE || pdata->cd_type == S3C_SDHCI_CD_PERMANENT) host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION; -- cgit v1.2.3 From d5a5bd1c3f7e8d010393530d60df8da75218a488 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 22 Jul 2011 16:13:36 +0300 Subject: mmc: mmc_test: avoid stalled file in debugfs During card removal and inserting cycle the test file in the debugfs could be stalled until the host driver removes it. Let's keep the file in the linked list and destroy it when card is removed. Signed-off-by: Andy Shevchenko Acked-by: Per Forlin Signed-off-by: Chris Ball --- drivers/mmc/card/mmc_test.c | 56 ++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index 006a5e9f8ab8..742dc98a034c 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -2900,7 +2900,7 @@ static const struct file_operations mmc_test_fops_testlist = { .release = single_release, }; -static void mmc_test_free_file_test(struct mmc_card *card) +static void mmc_test_free_dbgfs_file(struct mmc_card *card) { struct mmc_test_dbgfs_file *df, *dfs; @@ -2917,34 +2917,21 @@ static void mmc_test_free_file_test(struct mmc_card *card) mutex_unlock(&mmc_test_lock); } -static int mmc_test_register_file_test(struct mmc_card *card) +static int __mmc_test_register_dbgfs_file(struct mmc_card *card, + const char *name, mode_t mode, const struct file_operations *fops) { struct dentry *file = NULL; struct mmc_test_dbgfs_file *df; - int ret = 0; - - mutex_lock(&mmc_test_lock); - - if (card->debugfs_root) - file = debugfs_create_file("test", S_IWUSR | S_IRUGO, - card->debugfs_root, card, &mmc_test_fops_test); - - if (IS_ERR_OR_NULL(file)) { - dev_err(&card->dev, - "Can't create test. Perhaps debugfs is disabled.\n"); - ret = -ENODEV; - goto err; - } if (card->debugfs_root) - file = debugfs_create_file("testlist", S_IRUGO, - card->debugfs_root, card, &mmc_test_fops_testlist); + file = debugfs_create_file(name, mode, card->debugfs_root, + card, fops); if (IS_ERR_OR_NULL(file)) { dev_err(&card->dev, - "Can't create testlist. Perhaps debugfs is disabled.\n"); - ret = -ENODEV; - goto err; + "Can't create %s. Perhaps debugfs is disabled.\n", + name); + return -ENODEV; } df = kmalloc(sizeof(struct mmc_test_dbgfs_file), GFP_KERNEL); @@ -2952,14 +2939,31 @@ static int mmc_test_register_file_test(struct mmc_card *card) debugfs_remove(file); dev_err(&card->dev, "Can't allocate memory for internal usage.\n"); - ret = -ENOMEM; - goto err; + return -ENOMEM; } df->card = card; df->file = file; list_add(&df->link, &mmc_test_file_test); + return 0; +} + +static int mmc_test_register_dbgfs_file(struct mmc_card *card) +{ + int ret; + + mutex_lock(&mmc_test_lock); + + ret = __mmc_test_register_dbgfs_file(card, "test", S_IWUSR | S_IRUGO, + &mmc_test_fops_test); + if (ret) + goto err; + + ret = __mmc_test_register_dbgfs_file(card, "testlist", S_IRUGO, + &mmc_test_fops_testlist); + if (ret) + goto err; err: mutex_unlock(&mmc_test_lock); @@ -2974,7 +2978,7 @@ static int mmc_test_probe(struct mmc_card *card) if (!mmc_card_mmc(card) && !mmc_card_sd(card)) return -ENODEV; - ret = mmc_test_register_file_test(card); + ret = mmc_test_register_dbgfs_file(card); if (ret) return ret; @@ -2986,7 +2990,7 @@ static int mmc_test_probe(struct mmc_card *card) static void mmc_test_remove(struct mmc_card *card) { mmc_test_free_result(card); - mmc_test_free_file_test(card); + mmc_test_free_dbgfs_file(card); } static struct mmc_driver mmc_driver = { @@ -3006,7 +3010,7 @@ static void __exit mmc_test_exit(void) { /* Clear stalled data if card is still plugged */ mmc_test_free_result(NULL); - mmc_test_free_file_test(NULL); + mmc_test_free_dbgfs_file(NULL); mmc_unregister_driver(&mmc_driver); } -- cgit v1.2.3 From 38ca285044be88a0fb47b6eb91deeeb729435fd0 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Tue, 26 Jul 2011 17:12:37 +0900 Subject: mmc: core: Detect eMMC v4.5 ext_csd entries The eMMC v4.5 Spec is released now: EXT_CSD_REV Extended CSD Revision 255-7 Reserved 6 Revision 1.6 (for MMC v4.5) 5 Revision 1.5 (for MMV v4.41) ... Signed-off-by: Kyungmin Park Signed-off-by: Chris Ball --- drivers/mmc/core/mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index aa7d1d79b8c5..5700b1cbdfec 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -259,7 +259,7 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd) } card->ext_csd.rev = ext_csd[EXT_CSD_REV]; - if (card->ext_csd.rev > 5) { + if (card->ext_csd.rev > 6) { printk(KERN_ERR "%s: unrecognised EXT_CSD revision %d\n", mmc_hostname(card->host), card->ext_csd.rev); err = -EINVAL; -- cgit v1.2.3 From 1ccd4b7bfdcfcc8cc7ffc4a9c11d3ac5b6da8ca0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Thu, 28 Jul 2011 20:55:27 +0200 Subject: mmc: cb710: fix possible pci_dev leak in cb710_pci_configure() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported-by: Julia Lawall Signed-off-by: Michał Mirosław Signed-off-by: Chris Ball --- drivers/misc/cb710/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cb710/core.c b/drivers/misc/cb710/core.c index efec4139c3f6..68cd05b6d829 100644 --- a/drivers/misc/cb710/core.c +++ b/drivers/misc/cb710/core.c @@ -33,7 +33,7 @@ EXPORT_SYMBOL_GPL(cb710_pci_update_config_reg); static int __devinit cb710_pci_configure(struct pci_dev *pdev) { unsigned int devfn = PCI_DEVFN(PCI_SLOT(pdev->devfn), 0); - struct pci_dev *pdev0 = pci_get_slot(pdev->bus, devfn); + struct pci_dev *pdev0; u32 val; cb710_pci_update_config_reg(pdev, 0x48, @@ -43,6 +43,7 @@ static int __devinit cb710_pci_configure(struct pci_dev *pdev) if (val & 0x80000000) return 0; + pdev0 = pci_get_slot(pdev->bus, devfn); if (!pdev0) return -ENODEV; -- cgit v1.2.3 From 9b7bbe1085eb2b0f2d5d81f4116772cb2af497a4 Mon Sep 17 00:00:00 2001 From: Shashidhar Hiremath Date: Fri, 29 Jul 2011 08:49:50 -0400 Subject: mmc: dw_mmc: Fix mask in IDMAC_SET_BUFFER1_SIZE macro The mask used inside this macro was assuming Buffer_Size1's [BS1's] width to be 14 bits, it is actually 13 bits. Modify masks used in IDMAC_SET_BUFFER1_SIZE such that they use only 13 bits instead of current 14. Signed-off-by: Shashidhar Hiremath Acked-by: Will Newton Signed-off-by: Chris Ball --- drivers/mmc/host/dw_mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 77f0b6b1681d..f13bb49dbc71 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -62,7 +62,7 @@ struct idmac_desc { u32 des1; /* Buffer sizes */ #define IDMAC_SET_BUFFER1_SIZE(d, s) \ - ((d)->des1 = ((d)->des1 & 0x03ffc000) | ((s) & 0x3fff)) + ((d)->des1 = ((d)->des1 & 0x03ffe000) | ((s) & 0x1fff)) u32 des2; /* buffer 1 physical address */ -- cgit v1.2.3 From 55156d240a4d41d47310278c5139e24517f1c65b Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 29 Jul 2011 15:35:00 +0100 Subject: mmc: sdhci-s3c: Fix build for header change A header change has removed an implicit inclusion of module.h, breaking the build due to the use of THIS_MODULE. Fix that. Signed-off-by: Mark Brown Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-s3c.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c index 03da44a1b2ab..2bd7bf4fece7 100644 --- a/drivers/mmc/host/sdhci-s3c.c +++ b/drivers/mmc/host/sdhci-s3c.c @@ -19,6 +19,7 @@ #include #include #include +#include #include -- cgit v1.2.3 From 0d58864bf3472f8390e0c0a33bd875c7eec868bd Mon Sep 17 00:00:00 2001 From: Tony Lin Date: Thu, 11 Aug 2011 16:45:59 -0400 Subject: mmc: esdhc-imx: fix card interrupt loss on freescale eSDHC Apply a workaround for the imx eSDHC controller to avoid missing card interrupts. This makes SDIO work. Signed-off-by: Tony Lin Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-esdhc-imx.c | 40 +++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 9ebfb4b482f5..0e9780f5a4a9 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -27,6 +27,7 @@ #include "sdhci-pltfm.h" #include "sdhci-esdhc.h" +#define SDHCI_CTRL_D3CD 0x08 /* VENDOR SPEC register */ #define SDHCI_VENDOR_SPEC 0xC0 #define SDHCI_VENDOR_SPEC_SDIO_QUIRK 0x00000002 @@ -141,13 +142,32 @@ static void esdhc_writel_le(struct sdhci_host *host, u32 val, int reg) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct pltfm_imx_data *imx_data = pltfm_host->priv; struct esdhc_platform_data *boarddata = &imx_data->boarddata; - - if (unlikely((reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE) - && (boarddata->cd_type == ESDHC_CD_GPIO))) - /* - * these interrupts won't work with a custom card_detect gpio - */ - val &= ~(SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT); + u32 data; + + if (unlikely(reg == SDHCI_INT_ENABLE || reg == SDHCI_SIGNAL_ENABLE)) { + if (boarddata->cd_type == ESDHC_CD_GPIO) + /* + * These interrupts won't work with a custom + * card_detect gpio (only applied to mx25/35) + */ + val &= ~(SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT); + + if (val & SDHCI_INT_CARD_INT) { + /* + * Clear and then set D3CD bit to avoid missing the + * card interrupt. This is a eSDHC controller problem + * so we need to apply the following workaround: clear + * and set D3CD bit will make eSDHC re-sample the card + * interrupt. In case a card interrupt was lost, + * re-sample it by the following steps. + */ + data = readl(host->ioaddr + SDHCI_HOST_CONTROL); + data &= ~SDHCI_CTRL_D3CD; + writel(data, host->ioaddr + SDHCI_HOST_CONTROL); + data |= SDHCI_CTRL_D3CD; + writel(data, host->ioaddr + SDHCI_HOST_CONTROL); + } + } if (unlikely((imx_data->flags & ESDHC_FLAG_MULTIBLK_NO_INT) && (reg == SDHCI_INT_STATUS) @@ -217,8 +237,10 @@ static void esdhc_writeb_le(struct sdhci_host *host, u8 val, int reg) */ return; case SDHCI_HOST_CONTROL: - /* FSL messed up here, so we can just keep those two */ - new_val = val & (SDHCI_CTRL_LED | SDHCI_CTRL_4BITBUS); + /* FSL messed up here, so we can just keep those three */ + new_val = val & (SDHCI_CTRL_LED | \ + SDHCI_CTRL_4BITBUS | \ + SDHCI_CTRL_D3CD); /* ensure the endianess */ new_val |= ESDHC_HOST_CONTROL_LE; /* DMA mode bits are shifted */ -- cgit v1.2.3 From 4906baf080623b4971bdeeac0a9fec5b8885d3ac Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 3 Aug 2011 14:48:58 +0800 Subject: mmc: tmio: eliminate unused variable 'mmc' warning Fix below compile warning: CC drivers/mmc/host/tmio_mmc.o drivers/mmc/host/tmio_mmc.c: In function 'tmio_mmc_suspend': drivers/mmc/host/tmio_mmc.c:30: warning: unused variable 'mmc' drivers/mmc/host/tmio_mmc.c: In function 'tmio_mmc_resume': drivers/mmc/host/tmio_mmc.c:45: warning: unused variable 'mmc' Signed-off-by: Axel Lin Acked-by: Guennadi Liakhovetski Signed-off-by: Chris Ball --- drivers/mmc/host/tmio_mmc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c index 8d185de90d20..44a9668c4b7a 100644 --- a/drivers/mmc/host/tmio_mmc.c +++ b/drivers/mmc/host/tmio_mmc.c @@ -27,7 +27,6 @@ static int tmio_mmc_suspend(struct platform_device *dev, pm_message_t state) { const struct mfd_cell *cell = mfd_get_cell(dev); - struct mmc_host *mmc = platform_get_drvdata(dev); int ret; ret = tmio_mmc_host_suspend(&dev->dev); @@ -42,7 +41,6 @@ static int tmio_mmc_suspend(struct platform_device *dev, pm_message_t state) static int tmio_mmc_resume(struct platform_device *dev) { const struct mfd_cell *cell = mfd_get_cell(dev); - struct mmc_host *mmc = platform_get_drvdata(dev); int ret = 0; /* Tell the MFD core we are ready to be enabled */ -- cgit v1.2.3 From 83cbcd93a1be803ccda53e7acbdc9a937c8f6375 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 3 Aug 2011 18:35:58 +0300 Subject: mmc: Revert "mmc: sdhci: Fix SDHCI_QUIRK_TIMEOUT_USES_SDCLK" This reverts commit 4b01681c7764, which introduced a new potential divide by zero in the process of fixing one. The subsequent commits attempt to fix the issue properly. Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 262985a1a952..11d031b8708c 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -632,9 +632,6 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) target_timeout = data->timeout_ns / 1000 + data->timeout_clks / host->clock; - if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) - host->timeout_clk = host->clock / 1000; - /* * Figure out needed cycles. * We do this in steps in order to fit inside a 32 bit int. @@ -645,7 +642,6 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) * => * (1) / (2) > 2^6 */ - BUG_ON(!host->timeout_clk); count = 0; current_timeout = (1 << 13) * 1000 / host->timeout_clk; while (current_timeout < target_timeout) { @@ -2474,6 +2470,9 @@ int sdhci_add_host(struct sdhci_host *host) if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT) host->timeout_clk *= 1000; + if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) + host->timeout_clk = host->clock / 1000; + /* * In case of Host Controller v3.00, find out whether clock * multiplier is supported. -- cgit v1.2.3 From 78a2ca2727a9b992901c715bc881b6ddb4ec6a4e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 3 Aug 2011 18:35:59 +0300 Subject: mmc: sdhci: check host->clock before using it as a denominator Sometimes host->clock could be zero which is a legal situation. This patch checks host->clock before usage as a denominator when timeout is calculated. A similar patch is applied for mmc core (see commit e9b8684, "mmc: fix division by zero in MMC core"). Without this patch, the execution of the sdhci_calc_timeout could end up with a backtrace: <0>[ 4.014319] divide error: 0000 [#1] PREEMPT SMP <4>[ 4.014352] Modules linked in: g_ether <4>[ 4.014376] <4>[ 4.014393] Pid: 33, comm: kworker/u:2 Not tainted 3.0.0+ #646 <4>[ 4.014421] EIP: 0060:[] EFLAGS: 00010046 CPU: 1 <4>[ 4.014449] EIP is at sdhci_calc_timeout+0x2e/0x100 <4>[ 4.014468] EAX: 00000000 EBX: f5930fc8 ECX: 00000000 EDX: 00000000 <4>[ 4.014488] ESI: f5291de8 EDI: f5291db8 EBP: f5291c6c ESP: f5291c50 <4>[ 4.014508] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 <0>[ 4.014529] Process kworker/u:2 (pid: 33, ti=f5290000 task=f53065a0 task.ti=f5290000) <0>[ 4.014546] Stack: <4>[ 4.014557] 00000082 c1054fdd f5291c78 04000000 f5930fc8 f5291de8 f5291db8 f5291cac <4>[ 4.014611] c12fab7c c107a98b f5291c88 c13b6d3f f593109c f5882000 f5291cac c1054fdd <4>[ 4.014663] 00000000 00000000 f5882000 00000082 f5930fc8 f5291db8 0000000a f5291ccc <0>[ 4.014716] Call Trace: <4>[ 4.014743] [] ? mod_timer+0x11d/0x380 <4>[ 4.014770] [] sdhci_prepare_data+0x2c/0x3a0 <4>[ 4.014798] [] ? trace_hardirqs_off+0xb/0x10 <4>[ 4.014827] [] ? _raw_spin_unlock_irqrestore+0x2f/0x60 <4>[ 4.014854] [] ? mod_timer+0x11d/0x380 <4>[ 4.014880] [] sdhci_send_command+0xdb/0x210 <4>[ 4.014906] [] sdhci_request+0xc3/0x150 <4>[ 4.014932] [] mmc_start_request+0xda/0x200 <4>[ 4.014960] [] ? __raw_spin_lock_init+0x32/0x60 <4>[ 4.014989] [] ? __init_waitqueue_head+0x35/0x50 <4>[ 4.015015] [] mmc_wait_for_req+0x7b/0x90 <4>[ 4.015045] [] mmc_send_cxd_data+0xf7/0x130 <4>[ 4.015076] [] ? mmc_erase+0x140/0x140 <4>[ 4.015102] [] mmc_send_ext_csd+0x1d/0x20 <4>[ 4.015125] [] mmc_get_ext_csd+0x70/0x140 <4>[ 4.015151] [] mmc_compare_ext_csds+0x28/0x190 <4>[ 4.015176] [] mmc_init_card+0x24f/0x650 <4>[ 4.015201] [] ? _raw_spin_unlock_irqrestore+0x4d/0x60 <4>[ 4.015226] [] ? trace_hardirqs_on_caller+0x11c/0x160 <4>[ 4.015255] [] mmc_attach_mmc+0xa4/0x190 <4>[ 4.015282] [] mmc_rescan+0x210/0x240 <4>[ 4.015311] [] process_one_work+0x176/0x550 <4>[ 4.015336] [] ? process_one_work+0xfa/0x550 <4>[ 4.015360] [] ? mmc_init_erase+0x140/0x140 <4>[ 4.015385] [] worker_thread+0x12a/0x2c0 <4>[ 4.015410] [] ? manage_workers.clone.18+0x100/0x100 <4>[ 4.015437] [] kthread+0x74/0x80 <4>[ 4.015463] [] ? __init_kthread_worker+0x60/0x60 <4>[ 4.015490] [] kernel_thread_helper+0x6/0xd <0>[ 4.015507] Code: 57 89 d7 56 53 89 c3 83 ec 10 8b 40 04 8b 72 28 f6 c4 10 89 45 f0 0f 85 91 00 00 00 85 f6 0f 84 c1 00 00 00 8b 4e 04 31 d2 89 c8 73 58 ba d3 4d 62 10 89 c1 8b 06 f7 e2 c1 ea 06 01 d1 f7 45 <0>[ 4.015829] EIP: [] sdhci_calc_timeout+0x2e/0x100 SS:ESP 0068:f5291c50 Reported-by: Alexander Shishkin Signed-off-by: Andy Shevchenko Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 11d031b8708c..89ba4516cb8c 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -628,9 +628,11 @@ static u8 sdhci_calc_timeout(struct sdhci_host *host, struct mmc_command *cmd) /* timeout in us */ if (!data) target_timeout = cmd->cmd_timeout_ms * 1000; - else - target_timeout = data->timeout_ns / 1000 + - data->timeout_clks / host->clock; + else { + target_timeout = data->timeout_ns / 1000; + if (host->clock) + target_timeout += data->timeout_clks / host->clock; + } /* * Figure out needed cycles. -- cgit v1.2.3 From 272308caaa6c0f2b1500a3660b9fa75f17a45cc4 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 3 Aug 2011 18:36:00 +0300 Subject: mmc: sdhci: move timeout_clk calculation farther down This moves the calculation below the assignment of mmc->f_max, which we need for calculating timeout_clk in the next patch in this series. Signed-off-by: Andy Shevchenko Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 89ba4516cb8c..afa26bdcfa46 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2456,25 +2456,6 @@ int sdhci_add_host(struct sdhci_host *host) host->max_clk = host->ops->get_max_clock(host); } - host->timeout_clk = - (caps[0] & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT; - if (host->timeout_clk == 0) { - if (host->ops->get_timeout_clock) { - host->timeout_clk = host->ops->get_timeout_clock(host); - } else if (!(host->quirks & - SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)) { - printk(KERN_ERR - "%s: Hardware doesn't specify timeout clock " - "frequency.\n", mmc_hostname(mmc)); - return -ENODEV; - } - } - if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT) - host->timeout_clk *= 1000; - - if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) - host->timeout_clk = host->clock / 1000; - /* * In case of Host Controller v3.00, find out whether clock * multiplier is supported. @@ -2507,6 +2488,25 @@ int sdhci_add_host(struct sdhci_host *host) } else mmc->f_min = host->max_clk / SDHCI_MAX_DIV_SPEC_200; + host->timeout_clk = + (caps[0] & SDHCI_TIMEOUT_CLK_MASK) >> SDHCI_TIMEOUT_CLK_SHIFT; + if (host->timeout_clk == 0) { + if (host->ops->get_timeout_clock) { + host->timeout_clk = host->ops->get_timeout_clock(host); + } else if (!(host->quirks & + SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK)) { + printk(KERN_ERR + "%s: Hardware doesn't specify timeout clock " + "frequency.\n", mmc_hostname(mmc)); + return -ENODEV; + } + } + if (caps[0] & SDHCI_TIMEOUT_CLK_UNIT) + host->timeout_clk *= 1000; + + if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) + host->timeout_clk = host->clock / 1000; + if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) mmc->max_discard_to = (1 << 27) / (mmc->f_max / 1000); else -- cgit v1.2.3 From 65be3fef930beb3e282e7f23dfba63289971430c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 3 Aug 2011 18:36:01 +0300 Subject: mmc: sdhci: use f_max instead of host->clock for timeouts When timeout_clk is calculated the host->clock could be zero. So, instead of host->clock the calculation now uses mmc->f_max. Signed-off-by: Andy Shevchenko Cc: Mark Brown Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index afa26bdcfa46..0e02cc1df12e 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2505,12 +2505,9 @@ int sdhci_add_host(struct sdhci_host *host) host->timeout_clk *= 1000; if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) - host->timeout_clk = host->clock / 1000; + host->timeout_clk = mmc->f_max / 1000; - if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK) - mmc->max_discard_to = (1 << 27) / (mmc->f_max / 1000); - else - mmc->max_discard_to = (1 << 27) / host->timeout_clk; + mmc->max_discard_to = (1 << 27) / host->timeout_clk; mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23; -- cgit v1.2.3 From 7435bb7950ba8a3cbfa6d0c01e92588562533a3f Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Wed, 10 Aug 2011 18:46:28 +0900 Subject: mmc: core: use defined R1_STATE_PRG macro for card status Signed-off-by: Jaehoon Chung Signed-off-by: Kyungmin Park Signed-off-by: Chris Ball --- drivers/mmc/card/mmc_test.c | 2 +- drivers/mmc/core/core.c | 2 +- drivers/mmc/core/mmc_ops.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c index 742dc98a034c..2bf229acd3b8 100644 --- a/drivers/mmc/card/mmc_test.c +++ b/drivers/mmc/card/mmc_test.c @@ -224,7 +224,7 @@ static void mmc_test_prepare_mrq(struct mmc_test_card *test, static int mmc_test_busy(struct mmc_command *cmd) { return !(cmd->resp[0] & R1_READY_FOR_DATA) || - (R1_CURRENT_STATE(cmd->resp[0]) == 7); + (R1_CURRENT_STATE(cmd->resp[0]) == R1_STATE_PRG); } /* diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index 89bdeaec7182..91a0a7460ebb 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -1502,7 +1502,7 @@ static int mmc_do_erase(struct mmc_card *card, unsigned int from, goto out; } } while (!(cmd.resp[0] & R1_READY_FOR_DATA) || - R1_CURRENT_STATE(cmd.resp[0]) == 7); + R1_CURRENT_STATE(cmd.resp[0]) == R1_STATE_PRG); out: return err; } diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c index 845ce7c533b9..770c3d06f5dc 100644 --- a/drivers/mmc/core/mmc_ops.c +++ b/drivers/mmc/core/mmc_ops.c @@ -407,7 +407,7 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value, break; if (mmc_host_is_spi(card->host)) break; - } while (R1_CURRENT_STATE(status) == 7); + } while (R1_CURRENT_STATE(status) == R1_STATE_PRG); if (mmc_host_is_spi(card->host)) { if (status & R1_SPI_ILLEGAL_COMMAND) -- cgit v1.2.3 From 6daa777866569fc48fe3cfcd6fd01aba37ac06a5 Mon Sep 17 00:00:00 2001 From: Seungwon Jeon Date: Fri, 5 Aug 2011 12:35:03 +0900 Subject: mmc: dw_mmc: Fix DDR mode support. Host driver can't get a hint of DDR mode through ios->ddr flag anymore. ios->timing is currently used to inform DDR mode as a substitute. And capability of MMC_CAP_MMC_HIGHSPEED is added for DDR support. Signed-off-by: Seungwon Jeon Acked-by: Will Newton Signed-off-by: Chris Ball --- drivers/mmc/host/dw_mmc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index f13bb49dbc71..ff0f714b012c 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -699,7 +699,7 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) } /* DDR mode set */ - if (ios->ddr) { + if (ios->timing == MMC_TIMING_UHS_DDR50) { regs = mci_readl(slot->host, UHS_REG); regs |= (0x1 << slot->id) << 16; mci_writel(slot->host, UHS_REG, regs); @@ -1646,7 +1646,7 @@ static int __init dw_mci_init_slot(struct dw_mci *host, unsigned int id) mmc->caps |= MMC_CAP_4_BIT_DATA; if (host->pdata->quirks & DW_MCI_QUIRK_HIGHSPEED) - mmc->caps |= MMC_CAP_SD_HIGHSPEED; + mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED; #ifdef CONFIG_MMC_DW_IDMAC mmc->max_segs = host->ring_size; -- cgit v1.2.3 From 7fd781e8f9b72544a1c7f04456eb33d5ffaed592 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Mon, 8 Aug 2011 18:10:52 +0900 Subject: mmc: remove unused "ddr" parameter in struct mmc_ios "mmc: dw_mmc: Fix DDR mode support" removed the last user. Signed-off-by: Jaehoon Chung Signed-off-by: Kyungmin Park Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0f83858147a6..1d09562ccf73 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -56,8 +56,6 @@ struct mmc_ios { #define MMC_TIMING_UHS_SDR104 4 #define MMC_TIMING_UHS_DDR50 5 - unsigned char ddr; /* dual data rate used */ - #define MMC_SDR_MODE 0 #define MMC_1_2V_DDR_MODE 1 #define MMC_1_8V_DDR_MODE 2 -- cgit v1.2.3 From db12fb833a88c5114d70dcafebd33d460a09d593 Mon Sep 17 00:00:00 2001 From: Zac Storer Date: Sat, 13 Aug 2011 12:34:45 -0700 Subject: Documentation: fix spelling error in SubmittingPatches Fixed a spelling error. Signed-off-by: Zac Storer Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/SubmittingPatches | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches index 569f3532e138..4468ce24427c 100644 --- a/Documentation/SubmittingPatches +++ b/Documentation/SubmittingPatches @@ -303,7 +303,7 @@ patches that are being emailed around. The sign-off is a simple line at the end of the explanation for the patch, which certifies that you wrote it or otherwise have the right to -pass it on as a open-source patch. The rules are pretty simple: if you +pass it on as an open-source patch. The rules are pretty simple: if you can certify the below: Developer's Certificate of Origin 1.1 -- cgit v1.2.3 From 3c8429ad574f2d83878438522f41c003a6cc458e Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Sat, 13 Aug 2011 12:34:47 -0700 Subject: Documentation: drop Linux Source Driver from kernel-docs references Dropping LSD (Linux Source Driver) since it hasn't been available for a long time. Signed-off-by: Luis de Bethencourt Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/kernel-docs.txt | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/Documentation/kernel-docs.txt b/Documentation/kernel-docs.txt index 9a8674629a07..0e0734b509d8 100644 --- a/Documentation/kernel-docs.txt +++ b/Documentation/kernel-docs.txt @@ -620,17 +620,6 @@ (including this document itself) have been moved there, and might be more up to date than the web version. - * Name: "Linux Source Driver" - URL: http://lsd.linux.cz - Keywords: Browsing source code. - Description: "Linux Source Driver (LSD) is an application, which - can make browsing source codes of Linux kernel easier than you can - imagine. You can select between multiple versions of kernel (e.g. - 0.01, 1.0.0, 2.0.33, 2.0.34pre13, 2.0.0, 2.1.101 etc.). With LSD - you can search Linux kernel (fulltext, macros, types, functions - and variables) and LSD can generate patches for you on the fly - (files, directories or kernel)". - * Name: "Linux Kernel Source Reference" Author: Thomas Graichen. URL: http://marc.info/?l=linux-kernel&m=96446640102205&w=4 -- cgit v1.2.3 From ac1667db056a323cb0cb5d75e3bdb820804d46b6 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Sat, 13 Aug 2011 12:34:50 -0700 Subject: Documentation: add ARM user_debug to kernel-parameters.txt Usually kernel parameters are documented in kernel-parameters.txt but user_debug is only documented in the Kconfig. Document the option and point to the Kconfig help text for more info. Signed-off-by: Stephen Boyd Cc: Russell King Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 78926aa2531c..246b132dcc90 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2635,6 +2635,16 @@ bytes respectively. Such letter suffixes can also be entirely omitted. medium is write-protected). Example: quirks=0419:aaf5:rl,0421:0433:rc + user_debug= [KNL,ARM] + Format: + See arch/arm/Kconfig.debug help text. + 1 - undefined instruction events + 2 - system calls + 4 - invalid data aborts + 8 - SIGSEGV faults + 16 - SIGBUS faults + Example: user_debug=31 + userpte= [X86] Flags controlling user PTE allocations. -- cgit v1.2.3 From 1629024668d485d9ee8c5a6c9906b19ffd9a49d9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 13 Aug 2011 12:34:52 -0700 Subject: Documentation: kernel-parameters.txt cleanups General cleanups to kernel-parameters.txt: - add missing $ARCH that are being used/referenced - alphabetize the parameter restrictions list - spell "IA-64" as listed in arch/ia64/Kconfig instead of "IA64" - remove trailing whitespace - use hyphen in 32-bit etc. Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/kernel-parameters.txt | 42 +++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 246b132dcc90..6ca1f5cb71e0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -40,6 +40,7 @@ parameter is applicable: ALSA ALSA sound support is enabled. APIC APIC support is enabled. APM Advanced Power Management support is enabled. + ARM ARM architecture is enabled. AVR32 AVR32 architecture is enabled. AX25 Appropriate AX.25 support is enabled. BLACKFIN Blackfin architecture is enabled. @@ -49,6 +50,7 @@ parameter is applicable: EFI EFI Partitioning (GPT) is enabled EIDE EIDE/ATAPI support is enabled. FB The frame buffer device is enabled. + FTRACE Function tracing enabled. GCOV GCOV profiling is enabled. HW Appropriate hardware is enabled. IA-64 IA-64 architecture is enabled. @@ -69,6 +71,7 @@ parameter is applicable: Documentation/m68k/kernel-options.txt. MCA MCA bus support is enabled. MDA MDA console support is enabled. + MIPS MIPS architecture is enabled. MOUSE Appropriate mouse support is enabled. MSI Message Signaled Interrupts (PCI). MTD MTD (Memory Technology Device) support is enabled. @@ -100,7 +103,6 @@ parameter is applicable: SPARC Sparc architecture is enabled. SWSUSP Software suspend (hibernation) is enabled. SUSPEND System suspend states are enabled. - FTRACE Function tracing enabled. TPM TPM drivers are enabled. TS Appropriate touchscreen support is enabled. UMS USB Mass Storage support is enabled. @@ -115,7 +117,7 @@ parameter is applicable: X86-64 X86-64 architecture is enabled. More X86-64 boot options can be found in Documentation/x86/x86_64/boot-options.txt . - X86 Either 32bit or 64bit x86 (same as X86-32+X86-64) + X86 Either 32-bit or 64-bit x86 (same as X86-32+X86-64) XEN Xen support is enabled In addition, the following text indicates that the option: @@ -376,7 +378,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. atkbd.softrepeat= [HW] Use software keyboard repeat - autotest [IA64] + autotest [IA-64] baycom_epp= [HW,AX25] Format: , @@ -681,8 +683,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted. uart[8250],mmio32,[,options] Start an early, polled-mode console on the 8250/16550 UART at the specified I/O port or MMIO address. - MMIO inter-register address stride is either 8bit (mmio) - or 32bit (mmio32). + MMIO inter-register address stride is either 8-bit + (mmio) or 32-bit (mmio32). The options are the same as for ttyS, above. earlyprintk= [X86,SH,BLACKFIN] @@ -725,7 +727,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. See Documentation/block/as-iosched.txt and Documentation/block/deadline-iosched.txt for details. - elfcorehdr= [IA64,PPC,SH,X86] + elfcorehdr= [IA-64,PPC,SH,X86] Specifies physical address of start of kernel core image elf header. Generally kexec loader will pass this option to capture kernel. @@ -791,7 +793,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. tracer at boot up. function-list is a comma separated list of functions. This list can be changed at run time by the set_ftrace_filter file in the debugfs - tracing directory. + tracing directory. ftrace_notrace=[function-list] [FTRACE] Do not trace the functions specified in @@ -829,7 +831,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. hashdist= [KNL,NUMA] Large hashes allocated during boot are distributed across NUMA nodes. Defaults on - for 64bit NUMA, off otherwise. + for 64-bit NUMA, off otherwise. Format: 0 | 1 (for off | on) hcl= [IA-64] SGI's Hardware Graph compatibility layer @@ -998,10 +1000,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. DMA. forcedac [x86_64] With this option iommu will not optimize to look - for io virtual address below 32 bit forcing dual + for io virtual address below 32-bit forcing dual address cycle on pci bus for cards supporting greater - than 32 bit addressing. The default is to look - for translation below 32 bit and if not available + than 32-bit addressing. The default is to look + for translation below 32-bit and if not available then look in the higher range. strict [Default Off] With this option on every unmap_single operation will @@ -1017,7 +1019,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. off disable Interrupt Remapping nosid disable Source ID checking - inttest= [IA64] + inttest= [IA-64] iomem= Disable strict checking of access to MMIO memory strict regions from userspace. @@ -1034,7 +1036,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nomerge forcesac soft - pt [x86, IA64] + pt [x86, IA-64] io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in @@ -1165,7 +1167,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. kvm-amd.npt= [KVM,AMD] Disable nested paging (virtualized MMU) for all guests. - Default is 1 (enabled) if in 64bit or 32bit-PAE mode + Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. kvm-intel.ept= [KVM,Intel] Disable extended page tables (virtualized MMU) support on capable Intel chips. @@ -1202,10 +1204,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. libata.dma=0 Disable all PATA and SATA DMA libata.dma=1 PATA and SATA Disk DMA only libata.dma=2 ATAPI (CDROM) DMA only - libata.dma=4 Compact Flash DMA only + libata.dma=4 Compact Flash DMA only Combinations also work, so libata.dma=3 enables DMA for disks and CDROMs, but not CFs. - + libata.ignore_hpa= [LIBATA] Ignore HPA limit libata.ignore_hpa=0 keep BIOS limits (default) libata.ignore_hpa=1 ignore limits, using full disk @@ -1331,7 +1333,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. ltpc= [NET] Format: ,, - machvec= [IA64] Force the use of a particular machine-vector + machvec= [IA-64] Force the use of a particular machine-vector (machvec) in a generic kernel. Example: machvec=hpzx1_swiotlb @@ -1734,7 +1736,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nointroute [IA-64] - nojitter [IA64] Disables jitter checking for ITC timers. + nojitter [IA-64] Disables jitter checking for ITC timers. no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver @@ -1800,7 +1802,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. nox2apic [X86-64,APIC] Do not enable x2APIC mode. - nptcg= [IA64] Override max number of concurrent global TLB + nptcg= [IA-64] Override max number of concurrent global TLB purges which is reported from either PAL_VM_SUMMARY or SAL PALO. @@ -2077,7 +2079,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Format: { parport | timid | 0 } See also Documentation/parport.txt. - pmtmr= [X86] Manual setup of pmtmr I/O Port. + pmtmr= [X86] Manual setup of pmtmr I/O Port. Override pmtimer IOPort with a hex value. e.g. pmtmr=0x508 -- cgit v1.2.3 From 6989b5bb2f0302d824bfc5a9272e17eef22353cc Mon Sep 17 00:00:00 2001 From: Paul Mcquade Date: Sat, 13 Aug 2011 12:34:54 -0700 Subject: Documentation: email-clients: Add better Thunderbird information Add better Thunderbird information. Add Thunderbird Registry instructions to: Enable UTF8 & Preformat mode Disable HTML mode Signed-off-by: Paul McQuade Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/email-clients.txt | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Documentation/email-clients.txt b/Documentation/email-clients.txt index a0b58e29f911..860c29a472ad 100644 --- a/Documentation/email-clients.txt +++ b/Documentation/email-clients.txt @@ -199,18 +199,16 @@ to coerce it into behaving. To beat some sense out of the internal editor, do this: -- Under account settings, composition and addressing, uncheck "Compose - messages in HTML format". - - Edit your Thunderbird config settings so that it won't use format=flowed. Go to "edit->preferences->advanced->config editor" to bring up the thunderbird's registry editor, and set "mailnews.send_plaintext_flowed" to "false". -- Enable "preformat" mode: Shft-click on the Write icon to bring up the HTML - composer, select "Preformat" from the drop-down box just under the subject - line, then close the message without saving. (This setting also applies to - the text composer, but the only control for it is in the HTML composer.) +- Disable HTML Format: Set "mail.identity.id1.compose_html" to "false". + +- Enable "preformat" mode: Set "editor.quotesPreformatted" to "true". + +- Enable UTF8: Set "prefs.converted-to-utf8" to "true". - Install the "toggle wordwrap" extension. Download the file from: https://addons.mozilla.org/thunderbird/addon/2351/ -- cgit v1.2.3 From 4126dacb5b2ca85b187a27b93805254567526dc8 Mon Sep 17 00:00:00 2001 From: Sergiu Iordache Date: Sat, 13 Aug 2011 12:34:56 -0700 Subject: Documentation: add Ramoops usage description Add a documentation file describing the usage of Ramoops Signed-off-by: Sergiu Iordache Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/00-INDEX | 2 ++ Documentation/ramoops.txt | 76 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 Documentation/ramoops.txt diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX index 1f89424c36a6..65bbd2622396 100644 --- a/Documentation/00-INDEX +++ b/Documentation/00-INDEX @@ -272,6 +272,8 @@ printk-formats.txt - how to get printk format specifiers right prio_tree.txt - info on radix-priority-search-tree use for indexing vmas. +ramoops.txt + - documentation of the ramoops oops/panic logging module. rbtree.txt - info on what red-black trees are and what they are for. robust-futex-ABI.txt diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt new file mode 100644 index 000000000000..8fb1ba7fe7bf --- /dev/null +++ b/Documentation/ramoops.txt @@ -0,0 +1,76 @@ +Ramoops oops/panic logger +========================= + +Sergiu Iordache + +Updated: 8 August 2011 + +0. Introduction + +Ramoops is an oops/panic logger that writes its logs to RAM before the system +crashes. It works by logging oopses and panics in a circular buffer. Ramoops +needs a system with persistent RAM so that the content of that area can +survive after a restart. + +1. Ramoops concepts + +Ramoops uses a predefined memory area to store the dump. The start and size of +the memory area are set using two variables: + * "mem_address" for the start + * "mem_size" for the size. The memory size will be rounded down to a + power of two. + +The memory area is divided into "record_size" chunks (also rounded down to +power of two) and each oops/panic writes a "record_size" chunk of +information. + +Dumping both oopses and panics can be done by setting 1 in the "dump_oops" +variable while setting 0 in that variable dumps only the panics. + +The module uses a counter to record multiple dumps but the counter gets reset +on restart (i.e. new dumps after the restart will overwrite old ones). + +2. Setting the parameters + +Setting the ramoops parameters can be done in 2 different manners: + 1. Use the module parameters (which have the names of the variables described + as before). + 2. Use a platform device and set the platform data. The parameters can then + be set through that platform data. An example of doing that is: + +#include +[...] + +static struct ramoops_platform_data ramoops_data = { + .mem_size = <...>, + .mem_address = <...>, + .record_size = <...>, + .dump_oops = <...>, +}; + +static struct platform_device ramoops_dev = { + .name = "ramoops", + .dev = { + .platform_data = &ramoops_data, + }, +}; + +[... inside a function ...] +int ret; + +ret = platform_device_register(&ramoops_dev); +if (ret) { + printk(KERN_ERR "unable to register platform device\n"); + return ret; +} + +3. Dump format + +The data dump begins with a header, currently defined as "====" followed by a +timestamp and a new line. The dump then continues with the actual data. + +4. Reading the data + +The dump data can be read from memory (through /dev/mem or other means). +Getting the module parameters, which are needed in order to parse the data, can +be done through /sys/module/ramoops/parameters/* . -- cgit v1.2.3 From 399e1d9c22e15c1697d070bb89e6e0da3fae7e14 Mon Sep 17 00:00:00 2001 From: Ralf Thielow Date: Sat, 13 Aug 2011 12:34:57 -0700 Subject: Documentation: SubmittingDrivers: fix Linus's git tree URL Change resource URL to new git tree - (http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git). Signed-off-by: Ralf Thielow Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/SubmittingDrivers | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/SubmittingDrivers b/Documentation/SubmittingDrivers index 319baa8b60dd..36d16bbf72c6 100644 --- a/Documentation/SubmittingDrivers +++ b/Documentation/SubmittingDrivers @@ -130,7 +130,7 @@ Linux kernel master tree: ftp.??.kernel.org:/pub/linux/kernel/... ?? == your country code, such as "us", "uk", "fr", etc. - http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git + http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git Linux kernel mailing list: linux-kernel@vger.kernel.org -- cgit v1.2.3 From 4c74916fa81ce5a431350cb27eb9a7c95d3cf3d7 Mon Sep 17 00:00:00 2001 From: Marcos Souza Date: Sat, 13 Aug 2011 12:34:59 -0700 Subject: Documentation: befs.txt: no maintainer, orphaned Remove the name of Sergey Kostyliov as maintainer of befs. In the MAINTAINERS file, befs is orphaned. Signed-off-by: Marcos Souza Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- Documentation/filesystems/befs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/filesystems/befs.txt b/Documentation/filesystems/befs.txt index 6e49c363938e..da45e6c842b8 100644 --- a/Documentation/filesystems/befs.txt +++ b/Documentation/filesystems/befs.txt @@ -27,7 +27,7 @@ His original code can still be found at: Does anyone know of a more current email address for Makoto? He doesn't respond to the address given above... -Current maintainer: Sergey S. Kostyliov +This filesystem doesn't have a maintainer. WHAT IS THIS DRIVER? ================== -- cgit v1.2.3 From a115c72802c37351b6d87dfb62938d2ad440eef4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 4 Aug 2011 13:23:38 +0900 Subject: ASoC: Move WM8962 CLKREG_OVD earlier When the clocking registers are not overriden some of the registers are not writable. Signed-off-by: Mark Brown Acked-by: Liam Girdwood Cc: stable@kernel.org --- sound/soc/codecs/wm8962.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 60d740ebeb5b..28650edfdebb 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2927,10 +2927,6 @@ static int wm8962_set_bias_level(struct snd_soc_codec *codec, WM8962_BIAS_ENA | 0x180); msleep(5); - - snd_soc_update_bits(codec, WM8962_CLOCKING2, - WM8962_CLKREG_OVD, - WM8962_CLKREG_OVD); } /* VMID 2*250k */ @@ -3868,6 +3864,10 @@ static int wm8962_probe(struct snd_soc_codec *codec) */ snd_soc_update_bits(codec, WM8962_CLOCKING2, WM8962_SYSCLK_ENA, 0); + /* Ensure we have soft control over all registers */ + snd_soc_update_bits(codec, WM8962_CLOCKING2, + WM8962_CLKREG_OVD, WM8962_CLKREG_OVD); + regulator_bulk_disable(ARRAY_SIZE(wm8962->supplies), wm8962->supplies); if (pdata) { -- cgit v1.2.3 From fd049755636a8b2cc084e088967dd566467ccebc Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Fri, 12 Aug 2011 17:52:59 +0300 Subject: ASoC: h1940: Fix compilation error due to missing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add linux/types.h to fix this compilation error: In file included from arch/arm/mach-s3c2410/include/mach/gpio-fns.h:27:0, from arch/arm/mach-s3c2410/include/mach/gpio.h:27, from /home/anarsoul/work/pda-linux/linux-next/arch/arm/include/asm/gpio.h:5, from include/linux/gpio.h:18, from sound/soc/samsung/rx1950_uda1380.c:20: arch/arm/plat-samsung/include/plat/gpio-cfg.h:29:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:30:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_drvstr_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:57:2: error: expected specifier-qualifier-list before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:148:47: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:156:24: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_getpull’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:175:24: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h: In function ‘s3c_gpio_cfgrange_nopull’: arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: ‘s3c_gpio_pull_t’ undeclared (first use in this function) arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: note: each undeclared identifier is reported only once for each function it appears in arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: expected ‘)’ before numeric constant arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: too many arguments to function ‘s3c_gpio_cfgall_range’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:174:12: note: declared here arch/arm/plat-samsung/include/plat/gpio-cfg.h: At top level: arch/arm/plat-samsung/include/plat/gpio-cfg.h:199:26: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_get_drvstr’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:210:50: error: expected declaration specifiers or ‘...’ before ‘s5p_gpio_drvstr_t’ Signed-off-by: Vasily Khoruzhick Acked-by: Jassi Brar Signed-off-by: Mark Brown --- sound/soc/samsung/h1940_uda1380.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/samsung/h1940_uda1380.c b/sound/soc/samsung/h1940_uda1380.c index 241f55d00660..c6c65892294e 100644 --- a/sound/soc/samsung/h1940_uda1380.c +++ b/sound/soc/samsung/h1940_uda1380.c @@ -13,6 +13,7 @@ * */ +#include #include #include -- cgit v1.2.3 From b8487928f5ca2976e4cb8d329943af849d2b6197 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Fri, 12 Aug 2011 17:53:00 +0300 Subject: ASoC: rx1950: Fix compilation error due to missing header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add linux/types.h to fix this compilation error: In file included from arch/arm/mach-s3c2410/include/mach/gpio-fns.h:27:0, from arch/arm/mach-s3c2410/include/mach/gpio.h:27, from /home/anarsoul/work/pda-linux/linux-next/arch/arm/include/asm/gpio.h:5, from include/linux/gpio.h:18, from sound/soc/samsung/rx1950_uda1380.c:20: arch/arm/plat-samsung/include/plat/gpio-cfg.h:29:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:30:34: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_drvstr_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:57:2: error: expected specifier-qualifier-list before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:148:47: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:156:24: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s3c_gpio_getpull’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:175:24: error: expected declaration specifiers or ‘...’ before ‘s3c_gpio_pull_t’ arch/arm/plat-samsung/include/plat/gpio-cfg.h: In function ‘s3c_gpio_cfgrange_nopull’: arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: ‘s3c_gpio_pull_t’ undeclared (first use in this function) arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: note: each undeclared identifier is reported only once for each function it appears in arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: expected ‘)’ before numeric constant arch/arm/plat-samsung/include/plat/gpio-cfg.h:180:47: error: too many arguments to function ‘s3c_gpio_cfgall_range’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:174:12: note: declared here arch/arm/plat-samsung/include/plat/gpio-cfg.h: At top level: arch/arm/plat-samsung/include/plat/gpio-cfg.h:199:26: error: expected ‘=’, ‘,’, ‘;’, ‘asm’ or ‘__attribute__’ before ‘s5p_gpio_get_drvstr’ arch/arm/plat-samsung/include/plat/gpio-cfg.h:210:50: error: expected declaration specifiers or ‘...’ before ‘s5p_gpio_drvstr_t’ Signed-off-by: Vasily Khoruzhick Acked-by: Jassi Brar Signed-off-by: Mark Brown --- sound/soc/samsung/rx1950_uda1380.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/samsung/rx1950_uda1380.c b/sound/soc/samsung/rx1950_uda1380.c index 1e574a5d440d..bc8c1676459f 100644 --- a/sound/soc/samsung/rx1950_uda1380.c +++ b/sound/soc/samsung/rx1950_uda1380.c @@ -17,6 +17,7 @@ * */ +#include #include #include -- cgit v1.2.3 From 17f2ae7f677f023997e02fd2ebabd90ea2a0390d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 14 Aug 2011 13:34:31 +0200 Subject: PM / Domains: Fix build for CONFIG_PM_RUNTIME unset Function genpd_queue_power_off_work() is not defined for CONFIG_PM_RUNTIME, so pm_genpd_poweroff_unused() causes a build error to happen in that case. Fix the problem by making pm_genpd_poweroff_unused() depend on CONFIG_PM_RUNTIME too. Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 30 +++++++++++++++--------------- include/linux/pm_domain.h | 10 +++++++--- kernel/power/Kconfig | 4 ++++ 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index e18566a0fedd..1c374579407c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -460,6 +460,21 @@ static int pm_genpd_runtime_resume(struct device *dev) return 0; } +/** + * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use. + */ +void pm_genpd_poweroff_unused(void) +{ + struct generic_pm_domain *genpd; + + mutex_lock(&gpd_list_lock); + + list_for_each_entry(genpd, &gpd_list, gpd_list_node) + genpd_queue_power_off_work(genpd); + + mutex_unlock(&gpd_list_lock); +} + #else static inline void genpd_power_off_work_fn(struct work_struct *work) {} @@ -1255,18 +1270,3 @@ void pm_genpd_init(struct generic_pm_domain *genpd, list_add(&genpd->gpd_list_node, &gpd_list); mutex_unlock(&gpd_list_lock); } - -/** - * pm_genpd_poweroff_unused - Power off all PM domains with no devices in use. - */ -void pm_genpd_poweroff_unused(void) -{ - struct generic_pm_domain *genpd; - - mutex_lock(&gpd_list_lock); - - list_for_each_entry(genpd, &gpd_list, gpd_list_node) - genpd_queue_power_off_work(genpd); - - mutex_unlock(&gpd_list_lock); -} diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 21097cb086fe..f9ec1736a116 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -72,8 +72,6 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); extern int pm_genpd_poweron(struct generic_pm_domain *genpd); -extern void pm_genpd_poweroff_unused(void); -extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); #else static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev) @@ -101,8 +99,14 @@ static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) { return -ENOSYS; } -static inline void pm_genpd_poweroff_unused(void) {} +#endif + +#ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME +extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd); +extern void pm_genpd_poweroff_unused(void); +#else static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {} +static inline void pm_genpd_poweroff_unused(void) {} #endif #endif /* _LINUX_PM_DOMAIN_H */ diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index b1914cb9095c..3744c594b19b 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -231,3 +231,7 @@ config PM_CLK config PM_GENERIC_DOMAINS bool depends on PM + +config PM_GENERIC_DOMAINS_RUNTIME + def_bool y + depends on PM_RUNTIME && PM_GENERIC_DOMAINS -- cgit v1.2.3 From da6094ea7d3c2295473d8f5134279307255d6ebf Mon Sep 17 00:00:00 2001 From: Daniel Mack Date: Sun, 14 Aug 2011 11:31:16 +0200 Subject: ALSA: snd_usb_caiaq: track submitted output urbs The snd_usb_caiaq driver currently assumes that output urbs are serviced in time and doesn't track when and whether they are given back by the USB core. That usually works fine, but due to temporary limitations of the XHCI stack, we faced that urbs were submitted more than once with this approach. As it's no good practice to fire and forget urbs anyway, this patch introduces a proper bit mask to track which requests have been submitted and given back. That alone however doesn't make the driver work in case the host controller is broken and doesn't give back urbs at all, and the output stream will stop once all pre-allocated output urbs are consumed. But it does prevent crashes of the controller stack in such cases. See http://bugzilla.kernel.org/show_bug.cgi?id=40702 for more details. Signed-off-by: Daniel Mack Reported-and-tested-by: Matej Laitl Cc: Sarah Sharp Cc: stable@kernel.org Signed-off-by: Takashi Iwai --- sound/usb/caiaq/audio.c | 31 +++++++++++++++++++++++++++---- sound/usb/caiaq/device.h | 1 + 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/sound/usb/caiaq/audio.c b/sound/usb/caiaq/audio.c index aa52b3e13bb5..2cf87f5afed4 100644 --- a/sound/usb/caiaq/audio.c +++ b/sound/usb/caiaq/audio.c @@ -139,8 +139,12 @@ static void stream_stop(struct snd_usb_caiaqdev *dev) for (i = 0; i < N_URBS; i++) { usb_kill_urb(dev->data_urbs_in[i]); - usb_kill_urb(dev->data_urbs_out[i]); + + if (test_bit(i, &dev->outurb_active_mask)) + usb_kill_urb(dev->data_urbs_out[i]); } + + dev->outurb_active_mask = 0; } static int snd_usb_caiaq_substream_open(struct snd_pcm_substream *substream) @@ -612,8 +616,8 @@ static void read_completed(struct urb *urb) { struct snd_usb_caiaq_cb_info *info = urb->context; struct snd_usb_caiaqdev *dev; - struct urb *out; - int frame, len, send_it = 0, outframe = 0; + struct urb *out = NULL; + int i, frame, len, send_it = 0, outframe = 0; size_t offset = 0; if (urb->status || !info) @@ -624,7 +628,17 @@ static void read_completed(struct urb *urb) if (!dev->streaming) return; - out = dev->data_urbs_out[info->index]; + /* find an unused output urb that is unused */ + for (i = 0; i < N_URBS; i++) + if (test_and_set_bit(i, &dev->outurb_active_mask) == 0) { + out = dev->data_urbs_out[i]; + break; + } + + if (!out) { + log("Unable to find an output urb to use\n"); + goto requeue; + } /* read the recently received packet and send back one which has * the same layout */ @@ -655,8 +669,12 @@ static void read_completed(struct urb *urb) out->number_of_packets = outframe; out->transfer_flags = URB_ISO_ASAP; usb_submit_urb(out, GFP_ATOMIC); + } else { + struct snd_usb_caiaq_cb_info *oinfo = out->context; + clear_bit(oinfo->index, &dev->outurb_active_mask); } +requeue: /* re-submit inbound urb */ for (frame = 0; frame < FRAMES_PER_URB; frame++) { urb->iso_frame_desc[frame].offset = BYTES_PER_FRAME * frame; @@ -678,6 +696,8 @@ static void write_completed(struct urb *urb) dev->output_running = 1; wake_up(&dev->prepare_wait_queue); } + + clear_bit(info->index, &dev->outurb_active_mask); } static struct urb **alloc_urbs(struct snd_usb_caiaqdev *dev, int dir, int *ret) @@ -829,6 +849,9 @@ int snd_usb_caiaq_audio_init(struct snd_usb_caiaqdev *dev) if (!dev->data_cb_info) return -ENOMEM; + dev->outurb_active_mask = 0; + BUILD_BUG_ON(N_URBS > (sizeof(dev->outurb_active_mask) * 8)); + for (i = 0; i < N_URBS; i++) { dev->data_cb_info[i].dev = dev; dev->data_cb_info[i].index = i; diff --git a/sound/usb/caiaq/device.h b/sound/usb/caiaq/device.h index b2b310194ffa..3f9c6339ae90 100644 --- a/sound/usb/caiaq/device.h +++ b/sound/usb/caiaq/device.h @@ -96,6 +96,7 @@ struct snd_usb_caiaqdev { int input_panic, output_panic, warned; char *audio_in_buf, *audio_out_buf; unsigned int samplerates, bpp; + unsigned long outurb_active_mask; struct snd_pcm_substream *sub_playback[MAX_STREAMS]; struct snd_pcm_substream *sub_capture[MAX_STREAMS]; -- cgit v1.2.3 From f982f91516fa4cfd9d20518833cd04ad714585be Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 21 Jun 2011 22:09:50 +0200 Subject: mm: fix wrong vmap address calculations with odd NR_CPUS values Commit db64fe02258f ("mm: rewrite vmap layer") introduced code that does address calculations under the assumption that VMAP_BLOCK_SIZE is a power of two. However, this might not be true if CONFIG_NR_CPUS is not set to a power of two. Wrong vmap_block index/offset values could lead to memory corruption. However, this has never been observed in practice (or never been diagnosed correctly); what caught this was the BUG_ON in vb_alloc() that checks for inconsistent vmap_block indices. To fix this, ensure that VMAP_BLOCK_SIZE always is a power of two. BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=31572 Reported-by: Pavel Kysilka Reported-by: Matias A. Fonzo Signed-off-by: Clemens Ladisch Signed-off-by: Stefan Richter Cc: Nick Piggin Cc: Jeremy Fitzhardinge Cc: Krzysztof Helt Cc: Andrew Morton Cc: 2.6.28+ Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 464621d18eb2..7ef0903058ee 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -725,9 +725,10 @@ static void free_unmap_vmap_area_addr(unsigned long addr) #define VMAP_BBMAP_BITS_MIN (VMAP_MAX_ALLOC*2) #define VMAP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* can't use min() */ #define VMAP_MAX(x, y) ((x) > (y) ? (x) : (y)) /* can't use max() */ -#define VMAP_BBMAP_BITS VMAP_MIN(VMAP_BBMAP_BITS_MAX, \ - VMAP_MAX(VMAP_BBMAP_BITS_MIN, \ - VMALLOC_PAGES / NR_CPUS / 16)) +#define VMAP_BBMAP_BITS \ + VMAP_MIN(VMAP_BBMAP_BITS_MAX, \ + VMAP_MAX(VMAP_BBMAP_BITS_MIN, \ + VMALLOC_PAGES / roundup_pow_of_two(NR_CPUS) / 16)) #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) -- cgit v1.2.3 From 93ee7a9340d64f20295aacc3fb6a22b759323280 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 14 Aug 2011 15:09:08 -0700 Subject: Linux 3.1-rc2 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index b4ca4e111c9a..3241d41dfbff 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 3 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc1 -NAME = Sneaky Weasel +EXTRAVERSION = -rc2 +NAME = Wet Seal # *DOCUMENTATION* # To see a list of typical targets execute "make help" -- cgit v1.2.3 From c3c53a073247ee7522ca80393319540db9f4dc1e Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 15 Aug 2011 10:15:10 +0930 Subject: virtio: Add text copy of spec to Documentation/virtual. As suggested by Christoph Hellwig. Signed-off-by: Rusty Russell --- Documentation/virtual/00-INDEX | 3 + Documentation/virtual/virtio-spec.txt | 2200 +++++++++++++++++++++++++++++++++ 2 files changed, 2203 insertions(+) create mode 100644 Documentation/virtual/virtio-spec.txt diff --git a/Documentation/virtual/00-INDEX b/Documentation/virtual/00-INDEX index fe0251c4cfb7..8e601991d91c 100644 --- a/Documentation/virtual/00-INDEX +++ b/Documentation/virtual/00-INDEX @@ -8,3 +8,6 @@ lguest/ - Extremely simple hypervisor for experimental/educational use. uml/ - User Mode Linux, builds/runs Linux kernel as a userspace program. +virtio.txt + - Text version of draft virtio spec. + See http://ozlabs.org/~rusty/virtio-spec diff --git a/Documentation/virtual/virtio-spec.txt b/Documentation/virtual/virtio-spec.txt new file mode 100644 index 000000000000..a350ae135b8c --- /dev/null +++ b/Documentation/virtual/virtio-spec.txt @@ -0,0 +1,2200 @@ +[Generated file: see http://ozlabs.org/~rusty/virtio-spec/] +Virtio PCI Card Specification +v0.9.1 DRAFT +- + +Rusty Russell IBM Corporation (Editor) + +2011 August 1. + +Purpose and Description + +This document describes the specifications of the “virtio” family +of PCI[LaTeX Command: nomenclature] devices. These are devices +are found in virtual environments[LaTeX Command: nomenclature], +yet by design they are not all that different from physical PCI +devices, and this document treats them as such. This allows the +guest to use standard PCI drivers and discovery mechanisms. + +The purpose of virtio and this specification is that virtual +environments and guests should have a straightforward, efficient, +standard and extensible mechanism for virtual devices, rather +than boutique per-environment or per-OS mechanisms. + + Straightforward: Virtio PCI devices use normal PCI mechanisms + of interrupts and DMA which should be familiar to any device + driver author. There is no exotic page-flipping or COW + mechanism: it's just a PCI device.[footnote: +This lack of page-sharing implies that the implementation of the +device (e.g. the hypervisor or host) needs full access to the +guest memory. Communication with untrusted parties (i.e. +inter-guest communication) requires copying. +] + + Efficient: Virtio PCI devices consist of rings of descriptors + for input and output, which are neatly separated to avoid cache + effects from both guest and device writing to the same cache + lines. + + Standard: Virtio PCI makes no assumptions about the environment + in which it operates, beyond supporting PCI. In fact the virtio + devices specified in the appendices do not require PCI at all: + they have been implemented on non-PCI buses.[footnote: +The Linux implementation further separates the PCI virtio code +from the specific virtio drivers: these drivers are shared with +the non-PCI implementations (currently lguest and S/390). +] + + Extensible: Virtio PCI devices contain feature bits which are + acknowledged by the guest operating system during device setup. + This allows forwards and backwards compatibility: the device + offers all the features it knows about, and the driver + acknowledges those it understands and wishes to use. + + Virtqueues + +The mechanism for bulk data transport on virtio PCI devices is +pretentiously called a virtqueue. Each device can have zero or +more virtqueues: for example, the network device has one for +transmit and one for receive. + +Each virtqueue occupies two or more physically-contiguous pages +(defined, for the purposes of this specification, as 4096 bytes), +and consists of three parts: + + ++-------------------+-----------------------------------+-----------+ +| Descriptor Table | Available Ring (padding) | Used Ring | ++-------------------+-----------------------------------+-----------+ + + +When the driver wants to send buffers to the device, it puts them +in one or more slots in the descriptor table, and writes the +descriptor indices into the available ring. It then notifies the +device. When the device has finished with the buffers, it writes +the descriptors into the used ring, and sends an interrupt. + +Specification + + PCI Discovery + +Any PCI device with Vendor ID 0x1AF4, and Device ID 0x1000 +through 0x103F inclusive is a virtio device[footnote: +The actual value within this range is ignored +]. The device must also have a Revision ID of 0 to match this +specification. + +The Subsystem Device ID indicates which virtio device is +supported by the device. The Subsystem Vendor ID should reflect +the PCI Vendor ID of the environment (it's currently only used +for informational purposes by the guest). + + ++----------------------+--------------------+---------------+ +| Subsystem Device ID | Virtio Device | Specification | ++----------------------+--------------------+---------------+ ++----------------------+--------------------+---------------+ +| 1 | network card | Appendix C | ++----------------------+--------------------+---------------+ +| 2 | block device | Appendix D | ++----------------------+--------------------+---------------+ +| 3 | console | Appendix E | ++----------------------+--------------------+---------------+ +| 4 | entropy source | Appendix F | ++----------------------+--------------------+---------------+ +| 5 | memory ballooning | Appendix G | ++----------------------+--------------------+---------------+ +| 6 | ioMemory | - | ++----------------------+--------------------+---------------+ +| 9 | 9P transport | - | ++----------------------+--------------------+---------------+ + + + Device Configuration + +To configure the device, we use the first I/O region of the PCI +device. This contains a virtio header followed by a +device-specific region. + +There may be different widths of accesses to the I/O region; the “ +natural” access method for each field in the virtio header must +be used (i.e. 32-bit accesses for 32-bit fields, etc), but the +device-specific region can be accessed using any width accesses, +and should obtain the same results. + +Note that this is possible because while the virtio header is PCI +(i.e. little) endian, the device-specific region is encoded in +the native endian of the guest (where such distinction is +applicable). + + Device Initialization Sequence + +We start with an overview of device initialization, then expand +on the details of the device and how each step is preformed. + + Reset the device. This is not required on initial start up. + + The ACKNOWLEDGE status bit is set: we have noticed the device. + + The DRIVER status bit is set: we know how to drive the device. + + Device-specific setup, including reading the Device Feature + Bits, discovery of virtqueues for the device, optional MSI-X + setup, and reading and possibly writing the virtio + configuration space. + + The subset of Device Feature Bits understood by the driver is + written to the device. + + The DRIVER_OK status bit is set. + + The device can now be used (ie. buffers added to the + virtqueues)[footnote: +Historically, drivers have used the device before steps 5 and 6. +This is only allowed if the driver does not use any features +which would alter this early use of the device. +] + +If any of these steps go irrecoverably wrong, the guest should +set the FAILED status bit to indicate that it has given up on the +device (it can reset the device later to restart if desired). + +We now cover the fields required for general setup in detail. + + Virtio Header + +The virtio header looks as follows: + + ++------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+ +| Bits || 32 | 32 | 32 | 16 | 16 | 16 | 8 | 8 | ++------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+ +| Read/Write || R | R+W | R+W | R | R+W | R+W | R+W | R | ++------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+ +| Purpose || Device | Guest | Queue | Queue | Queue | Queue | Device | ISR | +| || Features bits 0:31 | Features bits 0:31 | Address | Size | Select | Notify | Status | Status | ++------------++---------------------+---------------------+----------+--------+---------+---------+---------+--------+ + + +If MSI-X is enabled for the device, two additional fields +immediately follow this header: + + ++------------++----------------+--------+ +| Bits || 16 | 16 | + +----------------+--------+ ++------------++----------------+--------+ +| Read/Write || R+W | R+W | ++------------++----------------+--------+ +| Purpose || Configuration | Queue | +| (MSI-X) || Vector | Vector | ++------------++----------------+--------+ + + +Finally, if feature bits (VIRTIO_F_FEATURES_HI) this is +immediately followed by two additional fields: + + ++------------++----------------------+---------------------- +| Bits || 32 | 32 ++------------++----------------------+---------------------- +| Read/Write || R | R+W ++------------++----------------------+---------------------- +| Purpose || Device | Guest +| || Features bits 32:63 | Features bits 32:63 ++------------++----------------------+---------------------- + + +Immediately following these general headers, there may be +device-specific headers: + + ++------------++--------------------+ +| Bits || Device Specific | + +--------------------+ ++------------++--------------------+ +| Read/Write || Device Specific | ++------------++--------------------+ +| Purpose || Device Specific... | +| || | ++------------++--------------------+ + + + Device Status + +The Device Status field is updated by the guest to indicate its +progress. This provides a simple low-level diagnostic: it's most +useful to imagine them hooked up to traffic lights on the console +indicating the status of each device. + +The device can be reset by writing a 0 to this field, otherwise +at least one bit should be set: + + ACKNOWLEDGE (1) Indicates that the guest OS has found the + device and recognized it as a valid virtio device. + + DRIVER (2) Indicates that the guest OS knows how to drive the + device. Under Linux, drivers can be loadable modules so there + may be a significant (or infinite) delay before setting this + bit. + + DRIVER_OK (3) Indicates that the driver is set up and ready to + drive the device. + + FAILED (8) Indicates that something went wrong in the guest, + and it has given up on the device. This could be an internal + error, or the driver didn't like the device for some reason, or + even a fatal error during device operation. The device must be + reset before attempting to re-initialize. + + Feature Bits + +The least significant 31 bits of the first configuration field +indicates the features that the device supports (the high bit is +reserved, and will be used to indicate the presence of future +feature bits elsewhere). If more than 31 feature bits are +supported, the device indicates so by setting feature bit 31 (see +[cha:Reserved-Feature-Bits]). The bits are allocated as follows: + + 0 to 23 Feature bits for the specific device type + + 24 to 40 Feature bits reserved for extensions to the queue and + feature negotiation mechanisms + + 41 to 63 Feature bits reserved for future extensions + +For example, feature bit 0 for a network device (i.e. Subsystem +Device ID 1) indicates that the device supports checksumming of +packets. + +The feature bits are negotiated: the device lists all the +features it understands in the Device Features field, and the +guest writes the subset that it understands into the Guest +Features field. The only way to renegotiate is to reset the +device. + +In particular, new fields in the device configuration header are +indicated by offering a feature bit, so the guest can check +before accessing that part of the configuration space. + +This allows for forwards and backwards compatibility: if the +device is enhanced with a new feature bit, older guests will not +write that feature bit back to the Guest Features field and it +can go into backwards compatibility mode. Similarly, if a guest +is enhanced with a feature that the device doesn't support, it +will not see that feature bit in the Device Features field and +can go into backwards compatibility mode (or, for poor +implementations, set the FAILED Device Status bit). + +Access to feature bits 32 to 63 is enabled by Guest by setting +feature bit 31. If this bit is unset, Device must assume that all +feature bits > 31 are unset. + + Configuration/Queue Vectors + +When MSI-X capability is present and enabled in the device +(through standard PCI configuration space) 4 bytes at byte offset +20 are used to map configuration change and queue interrupts to +MSI-X vectors. In this case, the ISR Status field is unused, and +device specific configuration starts at byte offset 24 in virtio +header structure. When MSI-X capability is not enabled, device +specific configuration starts at byte offset 20 in virtio header. + +Writing a valid MSI-X Table entry number, 0 to 0x7FF, to one of +Configuration/Queue Vector registers, maps interrupts triggered +by the configuration change/selected queue events respectively to +the corresponding MSI-X vector. To disable interrupts for a +specific event type, unmap it by writing a special NO_VECTOR +value: + +/* Vector value used to disable MSI for queue */ + +#define VIRTIO_MSI_NO_VECTOR 0xffff + +Reading these registers returns vector mapped to a given event, +or NO_VECTOR if unmapped. All queue and configuration change +events are unmapped by default. + +Note that mapping an event to vector might require allocating +internal device resources, and might fail. Devices report such +failures by returning the NO_VECTOR value when the relevant +Vector field is read. After mapping an event to vector, the +driver must verify success by reading the Vector field value: on +success, the previously written value is returned, and on +failure, NO_VECTOR is returned. If a mapping failure is detected, +the driver can retry mapping with fewervectors, or disable MSI-X. + + Virtqueue Configuration + +As a device can have zero or more virtqueues for bulk data +transport (for example, the network driver has two), the driver +needs to configure them as part of the device-specific +configuration. + +This is done as follows, for each virtqueue a device has: + + Write the virtqueue index (first queue is 0) to the Queue + Select field. + + Read the virtqueue size from the Queue Size field, which is + always a power of 2. This controls how big the virtqueue is + (see below). If this field is 0, the virtqueue does not exist. + + Allocate and zero virtqueue in contiguous physical memory, on a + 4096 byte alignment. Write the physical address, divided by + 4096 to the Queue Address field.[footnote: +The 4096 is based on the x86 page size, but it's also large +enough to ensure that the separate parts of the virtqueue are on +separate cache lines. +] + + Optionally, if MSI-X capability is present and enabled on the + device, select a vector to use to request interrupts triggered + by virtqueue events. Write the MSI-X Table entry number + corresponding to this vector in Queue Vector field. Read the + Queue Vector field: on success, previously written value is + returned; on failure, NO_VECTOR value is returned. + +The Queue Size field controls the total number of bytes required +for the virtqueue according to the following formula: + +#define ALIGN(x) (((x) + 4095) & ~4095) + +static inline unsigned vring_size(unsigned int qsz) + +{ + + return ALIGN(sizeof(struct vring_desc)*qsz + sizeof(u16)*(2 ++ qsz)) + + + ALIGN(sizeof(struct vring_used_elem)*qsz); + +} + +This currently wastes some space with padding, but also allows +future extensions. The virtqueue layout structure looks like this +(qsz is the Queue Size field, which is a variable, so this code +won't compile): + +struct vring { + + /* The actual descriptors (16 bytes each) */ + + struct vring_desc desc[qsz]; + + + + /* A ring of available descriptor heads with free-running +index. */ + + struct vring_avail avail; + + + + // Padding to the next 4096 boundary. + + char pad[]; + + + + // A ring of used descriptor heads with free-running index. + + struct vring_used used; + +}; + + A Note on Virtqueue Endianness + +Note that the endian of these fields and everything else in the +virtqueue is the native endian of the guest, not little-endian as +PCI normally is. This makes for simpler guest code, and it is +assumed that the host already has to be deeply aware of the guest +endian so such an “endian-aware” device is not a significant +issue. + + Descriptor Table + +The descriptor table refers to the buffers the guest is using for +the device. The addresses are physical addresses, and the buffers +can be chained via the next field. Each descriptor describes a +buffer which is read-only or write-only, but a chain of +descriptors can contain both read-only and write-only buffers. + +No descriptor chain may be more than 2^32 bytes long in total.struct vring_desc { + + /* Address (guest-physical). */ + + u64 addr; + + /* Length. */ + + u32 len; + +/* This marks a buffer as continuing via the next field. */ + +#define VRING_DESC_F_NEXT 1 + +/* This marks a buffer as write-only (otherwise read-only). */ + +#define VRING_DESC_F_WRITE 2 + +/* This means the buffer contains a list of buffer descriptors. +*/ + +#define VRING_DESC_F_INDIRECT 4 + + /* The flags as indicated above. */ + + u16 flags; + + /* Next field if flags & NEXT */ + + u16 next; + +}; + +The number of descriptors in the table is specified by the Queue +Size field for this virtqueue. + + Indirect Descriptors + +Some devices benefit by concurrently dispatching a large number +of large requests. The VIRTIO_RING_F_INDIRECT_DESC feature can be +used to allow this (see [cha:Reserved-Feature-Bits]). To increase +ring capacity it is possible to store a table of indirect +descriptors anywhere in memory, and insert a descriptor in main +virtqueue (with flags&INDIRECT on) that refers to memory buffer +containing this indirect descriptor table; fields addr and len +refer to the indirect table address and length in bytes, +respectively. The indirect table layout structure looks like this +(len is the length of the descriptor that refers to this table, +which is a variable, so this code won't compile): + +struct indirect_descriptor_table { + + /* The actual descriptors (16 bytes each) */ + + struct vring_desc desc[len / 16]; + +}; + +The first indirect descriptor is located at start of the indirect +descriptor table (index 0), additional indirect descriptors are +chained by next field. An indirect descriptor without next field +(with flags&NEXT off) signals the end of the indirect descriptor +table, and transfers control back to the main virtqueue. An +indirect descriptor can not refer to another indirect descriptor +table (flags&INDIRECT must be off). A single indirect descriptor +table can include both read-only and write-only descriptors; +write-only flag (flags&WRITE) in the descriptor that refers to it +is ignored. + + Available Ring + +The available ring refers to what descriptors we are offering the +device: it refers to the head of a descriptor chain. The “flags” +field is currently 0 or 1: 1 indicating that we do not need an +interrupt when the device consumes a descriptor from the +available ring. Alternatively, the guest can ask the device to +delay interrupts until an entry with an index specified by the “ +used_event” field is written in the used ring (equivalently, +until the idx field in the used ring will reach the value +used_event + 1). The method employed by the device is controlled +by the VIRTIO_RING_F_EVENT_IDX feature bit (see [cha:Reserved-Feature-Bits] +). This interrupt suppression is merely an optimization; it may +not suppress interrupts entirely. + +The “idx” field indicates where we would put the next descriptor +entry (modulo the ring size). This starts at 0, and increases. + +struct vring_avail { + +#define VRING_AVAIL_F_NO_INTERRUPT 1 + + u16 flags; + + u16 idx; + + u16 ring[qsz]; /* qsz is the Queue Size field read from device +*/ + + u16 used_event; + +}; + + Used Ring + +The used ring is where the device returns buffers once it is done +with them. The flags field can be used by the device to hint that +no notification is necessary when the guest adds to the available +ring. Alternatively, the “avail_event” field can be used by the +device to hint that no notification is necessary until an entry +with an index specified by the “avail_event” is written in the +available ring (equivalently, until the idx field in the +available ring will reach the value avail_event + 1). The method +employed by the device is controlled by the guest through the +VIRTIO_RING_F_EVENT_IDX feature bit (see [cha:Reserved-Feature-Bits] +). [footnote: +These fields are kept here because this is the only part of the +virtqueue written by the device +]. + +Each entry in the ring is a pair: the head entry of the +descriptor chain describing the buffer (this matches an entry +placed in the available ring by the guest earlier), and the total +of bytes written into the buffer. The latter is extremely useful +for guests using untrusted buffers: if you do not know exactly +how much has been written by the device, you usually have to zero +the buffer to ensure no data leakage occurs. + +/* u32 is used here for ids for padding reasons. */ + +struct vring_used_elem { + + /* Index of start of used descriptor chain. */ + + u32 id; + + /* Total length of the descriptor chain which was used +(written to) */ + + u32 len; + +}; + + + +struct vring_used { + +#define VRING_USED_F_NO_NOTIFY 1 + + u16 flags; + + u16 idx; + + struct vring_used_elem ring[qsz]; + + u16 avail_event; + +}; + + Helpers for Managing Virtqueues + +The Linux Kernel Source code contains the definitions above and +helper routines in a more usable form, in +include/linux/virtio_ring.h. This was explicitly licensed by IBM +and Red Hat under the (3-clause) BSD license so that it can be +freely used by all other projects, and is reproduced (with slight +variation to remove Linux assumptions) in Appendix A. + + Device Operation + +There are two parts to device operation: supplying new buffers to +the device, and processing used buffers from the device. As an +example, the virtio network device has two virtqueues: the +transmit virtqueue and the receive virtqueue. The driver adds +outgoing (read-only) packets to the transmit virtqueue, and then +frees them after they are used. Similarly, incoming (write-only) +buffers are added to the receive virtqueue, and processed after +they are used. + + Supplying Buffers to The Device + +Actual transfer of buffers from the guest OS to the device +operates as follows: + + Place the buffer(s) into free descriptor(s). + + If there are no free descriptors, the guest may choose to + notify the device even if notifications are suppressed (to + reduce latency).[footnote: +The Linux drivers do this only for read-only buffers: for +write-only buffers, it is assumed that the driver is merely +trying to keep the receive buffer ring full, and no notification +of this expected condition is necessary. +] + + Place the id of the buffer in the next ring entry of the + available ring. + + The steps (1) and (2) may be performed repeatedly if batching + is possible. + + A memory barrier should be executed to ensure the device sees + the updated descriptor table and available ring before the next + step. + + The available “idx” field should be increased by the number of + entries added to the available ring. + + A memory barrier should be executed to ensure that we update + the idx field before checking for notification suppression. + + If notifications are not suppressed, the device should be + notified of the new buffers. + +Note that the above code does not take precautions against the +available ring buffer wrapping around: this is not possible since +the ring buffer is the same size as the descriptor table, so step +(1) will prevent such a condition. + +In addition, the maximum queue size is 32768 (it must be a power +of 2 which fits in 16 bits), so the 16-bit “idx” value can always +distinguish between a full and empty buffer. + +Here is a description of each stage in more detail. + + Placing Buffers Into The Descriptor Table + +A buffer consists of zero or more read-only physically-contiguous +elements followed by zero or more physically-contiguous +write-only elements (it must have at least one element). This +algorithm maps it into the descriptor table: + + for each buffer element, b: + + Get the next free descriptor table entry, d + + Set d.addr to the physical address of the start of b + + Set d.len to the length of b. + + If b is write-only, set d.flags to VRING_DESC_F_WRITE, + otherwise 0. + + If there is a buffer element after this: + + Set d.next to the index of the next free descriptor element. + + Set the VRING_DESC_F_NEXT bit in d.flags. + +In practice, the d.next fields are usually used to chain free +descriptors, and a separate count kept to check there are enough +free descriptors before beginning the mappings. + + Updating The Available Ring + +The head of the buffer we mapped is the first d in the algorithm +above. A naive implementation would do the following: + +avail->ring[avail->idx % qsz] = head; + +However, in general we can add many descriptors before we update +the “idx” field (at which point they become visible to the +device), so we keep a counter of how many we've added: + +avail->ring[(avail->idx + added++) % qsz] = head; + + Updating The Index Field + +Once the idx field of the virtqueue is updated, the device will +be able to access the descriptor entries we've created and the +memory they refer to. This is why a memory barrier is generally +used before the idx update, to ensure it sees the most up-to-date +copy. + +The idx field always increments, and we let it wrap naturally at +65536: + +avail->idx += added; + + Notifying The Device + +Device notification occurs by writing the 16-bit virtqueue index +of this virtqueue to the Queue Notify field of the virtio header +in the first I/O region of the PCI device. This can be expensive, +however, so the device can suppress such notifications if it +doesn't need them. We have to be careful to expose the new idx +value before checking the suppression flag: it's OK to notify +gratuitously, but not to omit a required notification. So again, +we use a memory barrier here before reading the flags or the +avail_event field. + +If the VIRTIO_F_RING_EVENT_IDX feature is not negotiated, and if +the VRING_USED_F_NOTIFY flag is not set, we go ahead and write to +the PCI configuration space. + +If the VIRTIO_F_RING_EVENT_IDX feature is negotiated, we read the +avail_event field in the available ring structure. If the +available index crossed_the avail_event field value since the +last notification, we go ahead and write to the PCI configuration +space. The avail_event field wraps naturally at 65536 as well: + +(u16)(new_idx - avail_event - 1) < (u16)(new_idx - old_idx) + + Receiving Used Buffers From The + Device + +Once the device has used a buffer (read from or written to it, or +parts of both, depending on the nature of the virtqueue and the +device), it sends an interrupt, following an algorithm very +similar to the algorithm used for the driver to send the device a +buffer: + + Write the head descriptor number to the next field in the used + ring. + + Update the used ring idx. + + Determine whether an interrupt is necessary: + + If the VIRTIO_F_RING_EVENT_IDX feature is not negotiated: check + if f the VRING_AVAIL_F_NO_INTERRUPT flag is not set in avail- + >flags + + If the VIRTIO_F_RING_EVENT_IDX feature is negotiated: check + whether the used index crossed the used_event field value + since the last update. The used_event field wraps naturally + at 65536 as well:(u16)(new_idx - used_event - 1) < (u16)(new_idx - old_idx) + + If an interrupt is necessary: + + If MSI-X capability is disabled: + + Set the lower bit of the ISR Status field for the device. + + Send the appropriate PCI interrupt for the device. + + If MSI-X capability is enabled: + + Request the appropriate MSI-X interrupt message for the + device, Queue Vector field sets the MSI-X Table entry + number. + + If Queue Vector field value is NO_VECTOR, no interrupt + message is requested for this event. + +The guest interrupt handler should: + + If MSI-X capability is disabled: read the ISR Status field, + which will reset it to zero. If the lower bit is zero, the + interrupt was not for this device. Otherwise, the guest driver + should look through the used rings of each virtqueue for the + device, to see if any progress has been made by the device + which requires servicing. + + If MSI-X capability is enabled: look through the used rings of + each virtqueue mapped to the specific MSI-X vector for the + device, to see if any progress has been made by the device + which requires servicing. + +For each ring, guest should then disable interrupts by writing +VRING_AVAIL_F_NO_INTERRUPT flag in avail structure, if required. +It can then process used ring entries finally enabling interrupts +by clearing the VRING_AVAIL_F_NO_INTERRUPT flag or updating the +EVENT_IDX field in the available structure, Guest should then +execute a memory barrier, and then recheck the ring empty +condition. This is necessary to handle the case where, after the +last check and before enabling interrupts, an interrupt has been +suppressed by the device: + +vring_disable_interrupts(vq); + +for (;;) { + + if (vq->last_seen_used != vring->used.idx) { + + vring_enable_interrupts(vq); + + mb(); + + if (vq->last_seen_used != vring->used.idx) + + break; + + } + + struct vring_used_elem *e = +vring.used->ring[vq->last_seen_used%vsz]; + + process_buffer(e); + + vq->last_seen_used++; + +} + + Dealing With Configuration Changes + +Some virtio PCI devices can change the device configuration +state, as reflected in the virtio header in the PCI configuration +space. In this case: + + If MSI-X capability is disabled: an interrupt is delivered and + the second highest bit is set in the ISR Status field to + indicate that the driver should re-examine the configuration + space.Note that a single interrupt can indicate both that one + or more virtqueue has been used and that the configuration + space has changed: even if the config bit is set, virtqueues + must be scanned. + + If MSI-X capability is enabled: an interrupt message is + requested. The Configuration Vector field sets the MSI-X Table + entry number to use. If Configuration Vector field value is + NO_VECTOR, no interrupt message is requested for this event. + +Creating New Device Types + +Various considerations are necessary when creating a new device +type: + + How Many Virtqueues? + +It is possible that a very simple device will operate entirely +through its configuration space, but most will need at least one +virtqueue in which it will place requests. A device with both +input and output (eg. console and network devices described here) +need two queues: one which the driver fills with buffers to +receive input, and one which the driver places buffers to +transmit output. + + What Configuration Space Layout? + +Configuration space is generally used for rarely-changing or +initialization-time parameters. But it is a limited resource, so +it might be better to use a virtqueue to update configuration +information (the network device does this for filtering, +otherwise the table in the config space could potentially be very +large). + +Note that this space is generally the guest's native endian, +rather than PCI's little-endian. + + What Device Number? + +Currently device numbers are assigned quite freely: a simple +request mail to the author of this document or the Linux +virtualization mailing list[footnote: + +https://lists.linux-foundation.org/mailman/listinfo/virtualization +] will be sufficient to secure a unique one. + +Meanwhile for experimental drivers, use 65535 and work backwards. + + How many MSI-X vectors? + +Using the optional MSI-X capability devices can speed up +interrupt processing by removing the need to read ISR Status +register by guest driver (which might be an expensive operation), +reducing interrupt sharing between devices and queues within the +device, and handling interrupts from multiple CPUs. However, some +systems impose a limit (which might be as low as 256) on the +total number of MSI-X vectors that can be allocated to all +devices. Devices and/or device drivers should take this into +account, limiting the number of vectors used unless the device is +expected to cause a high volume of interrupts. Devices can +control the number of vectors used by limiting the MSI-X Table +Size or not presenting MSI-X capability in PCI configuration +space. Drivers can control this by mapping events to as small +number of vectors as possible, or disabling MSI-X capability +altogether. + + Message Framing + +The descriptors used for a buffer should not effect the semantics +of the message, except for the total length of the buffer. For +example, a network buffer consists of a 10 byte header followed +by the network packet. Whether this is presented in the ring +descriptor chain as (say) a 10 byte buffer and a 1514 byte +buffer, or a single 1524 byte buffer, or even three buffers, +should have no effect. + +In particular, no implementation should use the descriptor +boundaries to determine the size of any header in a request.[footnote: +The current qemu device implementations mistakenly insist that +the first descriptor cover the header in these cases exactly, so +a cautious driver should arrange it so. +] + + Device Improvements + +Any change to configuration space, or new virtqueues, or +behavioural changes, should be indicated by negotiation of a new +feature bit. This establishes clarity[footnote: +Even if it does mean documenting design or implementation +mistakes! +] and avoids future expansion problems. + +Clusters of functionality which are always implemented together +can use a single bit, but if one feature makes sense without the +others they should not be gratuitously grouped together to +conserve feature bits. We can always extend the spec when the +first person needs more than 24 feature bits for their device. + +[LaTeX Command: printnomenclature] + +Appendix A: virtio_ring.h + +#ifndef VIRTIO_RING_H + +#define VIRTIO_RING_H + +/* An interface for efficient virtio implementation. + + * + + * This header is BSD licensed so anyone can use the definitions + + * to implement compatible drivers/servers. + + * + + * Copyright 2007, 2009, IBM Corporation + + * Copyright 2011, Red Hat, Inc + + * All rights reserved. + + * + + * Redistribution and use in source and binary forms, with or +without + + * modification, are permitted provided that the following +conditions + + * are met: + + * 1. Redistributions of source code must retain the above +copyright + + * notice, this list of conditions and the following +disclaimer. + + * 2. Redistributions in binary form must reproduce the above +copyright + + * notice, this list of conditions and the following +disclaimer in the + + * documentation and/or other materials provided with the +distribution. + + * 3. Neither the name of IBM nor the names of its contributors + + * may be used to endorse or promote products derived from +this software + + * without specific prior written permission. + + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS ``AS IS'' AND + + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE + + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE + + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE +LIABLE + + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL + + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS + + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) + + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT + + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY + + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF + + * SUCH DAMAGE. + + */ + + + +/* This marks a buffer as continuing via the next field. */ + +#define VRING_DESC_F_NEXT 1 + +/* This marks a buffer as write-only (otherwise read-only). */ + +#define VRING_DESC_F_WRITE 2 + + + +/* The Host uses this in used->flags to advise the Guest: don't +kick me + + * when you add a buffer. It's unreliable, so it's simply an + + * optimization. Guest will still kick if it's out of buffers. +*/ + +#define VRING_USED_F_NO_NOTIFY 1 + +/* The Guest uses this in avail->flags to advise the Host: don't + + * interrupt me when you consume a buffer. It's unreliable, so +it's + + * simply an optimization. */ + +#define VRING_AVAIL_F_NO_INTERRUPT 1 + + + +/* Virtio ring descriptors: 16 bytes. + + * These can chain together via "next". */ + +struct vring_desc { + + /* Address (guest-physical). */ + + uint64_t addr; + + /* Length. */ + + uint32_t len; + + /* The flags as indicated above. */ + + uint16_t flags; + + /* We chain unused descriptors via this, too */ + + uint16_t next; + +}; + + + +struct vring_avail { + + uint16_t flags; + + uint16_t idx; + + uint16_t ring[]; + + uint16_t used_event; + +}; + + + +/* u32 is used here for ids for padding reasons. */ + +struct vring_used_elem { + + /* Index of start of used descriptor chain. */ + + uint32_t id; + + /* Total length of the descriptor chain which was written +to. */ + + uint32_t len; + +}; + + + +struct vring_used { + + uint16_t flags; + + uint16_t idx; + + struct vring_used_elem ring[]; + + uint16_t avail_event; + +}; + + + +struct vring { + + unsigned int num; + + + + struct vring_desc *desc; + + struct vring_avail *avail; + + struct vring_used *used; + +}; + + + +/* The standard layout for the ring is a continuous chunk of +memory which + + * looks like this. We assume num is a power of 2. + + * + + * struct vring { + + * // The actual descriptors (16 bytes each) + + * struct vring_desc desc[num]; + + * + + * // A ring of available descriptor heads with free-running +index. + + * __u16 avail_flags; + + * __u16 avail_idx; + + * __u16 available[num]; + + * + + * // Padding to the next align boundary. + + * char pad[]; + + * + + * // A ring of used descriptor heads with free-running +index. + + * __u16 used_flags; + + * __u16 EVENT_IDX; + + * struct vring_used_elem used[num]; + + * }; + + * Note: for virtio PCI, align is 4096. + + */ + +static inline void vring_init(struct vring *vr, unsigned int num, +void *p, + + unsigned long align) + +{ + + vr->num = num; + + vr->desc = p; + + vr->avail = p + num*sizeof(struct vring_desc); + + vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + + + align-1) + + & ~(align - 1)); + +} + + + +static inline unsigned vring_size(unsigned int num, unsigned long +align) + +{ + + return ((sizeof(struct vring_desc)*num + +sizeof(uint16_t)*(2+num) + + + align - 1) & ~(align - 1)) + + + sizeof(uint16_t)*3 + sizeof(struct +vring_used_elem)*num; + +} + + + +static inline int vring_need_event(uint16_t event_idx, uint16_t +new_idx, uint16_t old_idx) + +{ + + return (uint16_t)(new_idx - event_idx - 1) < +(uint16_t)(new_idx - old_idx); + +} + +#endif /* VIRTIO_RING_H */ + +Appendix B: Reserved Feature Bits + +Currently there are five device-independent feature bits defined: + + VIRTIO_F_NOTIFY_ON_EMPTY (24) Negotiating this feature + indicates that the driver wants an interrupt if the device runs + out of available descriptors on a virtqueue, even though + interrupts are suppressed using the VRING_AVAIL_F_NO_INTERRUPT + flag or the used_event field. An example of this is the + networking driver: it doesn't need to know every time a packet + is transmitted, but it does need to free the transmitted + packets a finite time after they are transmitted. It can avoid + using a timer if the device interrupts it when all the packets + are transmitted. + + VIRTIO_F_RING_INDIRECT_DESC (28) Negotiating this feature + indicates that the driver can use descriptors with the + VRING_DESC_F_INDIRECT flag set, as described in [sub:Indirect-Descriptors] + . + + VIRTIO_F_RING_EVENT_IDX(29) This feature enables the used_event + and the avail_event fields. If set, it indicates that the + device should ignore the flags field in the available ring + structure. Instead, the used_event field in this structure is + used by guest to suppress device interrupts. Further, the + driver should ignore the flags field in the used ring + structure. Instead, the avail_event field in this structure is + used by the device to suppress notifications. If unset, the + driver should ignore the used_event field; the device should + ignore the avail_event field; the flags field is used + + VIRTIO_F_BAD_FEATURE(30) This feature should never be + negotiated by the guest; doing so is an indication that the + guest is faulty[footnote: +An experimental virtio PCI driver contained in Linux version +2.6.25 had this problem, and this feature bit can be used to +detect it. +] + + VIRTIO_F_FEATURES_HIGH(31) This feature indicates that the + device supports feature bits 32:63. If unset, feature bits + 32:63 are unset. + +Appendix C: Network Device + +The virtio network device is a virtual ethernet card, and is the +most complex of the devices supported so far by virtio. It has +enhanced rapidly and demonstrates clearly how support for new +features should be added to an existing device. Empty buffers are +placed in one virtqueue for receiving packets, and outgoing +packets are enqueued into another for transmission in that order. +A third command queue is used to control advanced filtering +features. + + Configuration + + Subsystem Device ID 1 + + Virtqueues 0:receiveq. 1:transmitq. 2:controlq[footnote: +Only if VIRTIO_NET_F_CTRL_VQ set +] + + Feature bits + + VIRTIO_NET_F_CSUM (0) Device handles packets with partial + checksum + + VIRTIO_NET_F_GUEST_CSUM (1) Guest handles packets with partial + checksum + + VIRTIO_NET_F_MAC (5) Device has given MAC address. + + VIRTIO_NET_F_GSO (6) (Deprecated) device handles packets with + any GSO type.[footnote: +It was supposed to indicate segmentation offload support, but +upon further investigation it became clear that multiple bits +were required. +] + + VIRTIO_NET_F_GUEST_TSO4 (7) Guest can receive TSOv4. + + VIRTIO_NET_F_GUEST_TSO6 (8) Guest can receive TSOv6. + + VIRTIO_NET_F_GUEST_ECN (9) Guest can receive TSO with ECN. + + VIRTIO_NET_F_GUEST_UFO (10) Guest can receive UFO. + + VIRTIO_NET_F_HOST_TSO4 (11) Device can receive TSOv4. + + VIRTIO_NET_F_HOST_TSO6 (12) Device can receive TSOv6. + + VIRTIO_NET_F_HOST_ECN (13) Device can receive TSO with ECN. + + VIRTIO_NET_F_HOST_UFO (14) Device can receive UFO. + + VIRTIO_NET_F_MRG_RXBUF (15) Guest can merge receive buffers. + + VIRTIO_NET_F_STATUS (16) Configuration status field is + available. + + VIRTIO_NET_F_CTRL_VQ (17) Control channel is available. + + VIRTIO_NET_F_CTRL_RX (18) Control channel RX mode support. + + VIRTIO_NET_F_CTRL_VLAN (19) Control channel VLAN filtering. + + Device configuration layout Two configuration fields are + currently defined. The mac address field always exists (though + is only valid if VIRTIO_NET_F_MAC is set), and the status field + only exists if VIRTIO_NET_F_STATUS is set. Only one bit is + currently defined for the status field: VIRTIO_NET_S_LINK_UP. #define VIRTIO_NET_S_LINK_UP 1 + + + +struct virtio_net_config { + + u8 mac[6]; + + u16 status; + +}; + + Device Initialization + + The initialization routine should identify the receive and + transmission virtqueues. + + If the VIRTIO_NET_F_MAC feature bit is set, the configuration + space “mac” entry indicates the “physical” address of the the + network card, otherwise a private MAC address should be + assigned. All guests are expected to negotiate this feature if + it is set. + + If the VIRTIO_NET_F_CTRL_VQ feature bit is negotiated, identify + the control virtqueue. + + If the VIRTIO_NET_F_STATUS feature bit is negotiated, the link + status can be read from the bottom bit of the “status” config + field. Otherwise, the link should be assumed active. + + The receive virtqueue should be filled with receive buffers. + This is described in detail below in “Setting Up Receive + Buffers”. + + A driver can indicate that it will generate checksumless + packets by negotating the VIRTIO_NET_F_CSUM feature. This “ + checksum offload” is a common feature on modern network cards. + + If that feature is negotiated, a driver can use TCP or UDP + segmentation offload by negotiating the VIRTIO_NET_F_HOST_TSO4 + (IPv4 TCP), VIRTIO_NET_F_HOST_TSO6 (IPv6 TCP) and + VIRTIO_NET_F_HOST_UFO (UDP fragmentation) features. It should + not send TCP packets requiring segmentation offload which have + the Explicit Congestion Notification bit set, unless the + VIRTIO_NET_F_HOST_ECN feature is negotiated.[footnote: +This is a common restriction in real, older network cards. +] + + The converse features are also available: a driver can save the + virtual device some work by negotiating these features.[footnote: +For example, a network packet transported between two guests on +the same system may not require checksumming at all, nor +segmentation, if both guests are amenable. +] The VIRTIO_NET_F_GUEST_CSUM feature indicates that partially + checksummed packets can be received, and if it can do that then + the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, + VIRTIO_NET_F_GUEST_UFO and VIRTIO_NET_F_GUEST_ECN are the input + equivalents of the features described above. See “Receiving + Packets” below. + + Device Operation + +Packets are transmitted by placing them in the transmitq, and +buffers for incoming packets are placed in the receiveq. In each +case, the packet itself is preceeded by a header: + +struct virtio_net_hdr { + +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 + + u8 flags; + +#define VIRTIO_NET_HDR_GSO_NONE 0 + +#define VIRTIO_NET_HDR_GSO_TCPV4 1 + +#define VIRTIO_NET_HDR_GSO_UDP 3 + +#define VIRTIO_NET_HDR_GSO_TCPV6 4 + +#define VIRTIO_NET_HDR_GSO_ECN 0x80 + + u8 gso_type; + + u16 hdr_len; + + u16 gso_size; + + u16 csum_start; + + u16 csum_offset; + +/* Only if VIRTIO_NET_F_MRG_RXBUF: */ + + u16 num_buffers + +}; + +The controlq is used to control device features such as +filtering. + + Packet Transmission + +Transmitting a single packet is simple, but varies depending on +the different features the driver negotiated. + + If the driver negotiated VIRTIO_NET_F_CSUM, and the packet has + not been fully checksummed, then the virtio_net_hdr's fields + are set as follows. Otherwise, the packet must be fully + checksummed, and flags is zero. + + flags has the VIRTIO_NET_HDR_F_NEEDS_CSUM set, + + csum_start is set to the offset within + the packet to begin checksumming, and + + csum_offset indicates how many bytes after the csum_start the + new (16 bit ones' complement) checksum should be placed.[footnote: +For example, consider a partially checksummed TCP (IPv4) packet. +It will have a 14 byte ethernet header and 20 byte IP header +followed by the TCP header (with the TCP checksum field 16 bytes +into that header). csum_start will be 14+20 = 34 (the TCP +checksum includes the header), and csum_offset will be 16. The +value in the TCP checksum field will be the sum of the TCP pseudo +header, so that replacing it by the ones' complement checksum of +the TCP header and body will give the correct result. +] + + If the driver negotiated + VIRTIO_NET_F_HOST_TSO4, TSO6 or UFO, and the packet requires + TCP segmentation or UDP fragmentation, then the “gso_type” + field is set to VIRTIO_NET_HDR_GSO_TCPV4, TCPV6 or UDP. + (Otherwise, it is set to VIRTIO_NET_HDR_GSO_NONE). In this + case, packets larger than 1514 bytes can be transmitted: the + metadata indicates how to replicate the packet header to cut it + into smaller packets. The other gso fields are set: + + hdr_len is a hint to the device as to how much of the header + needs to be kept to copy into each packet, usually set to the + length of the headers, including the transport header.[footnote: +Due to various bugs in implementations, this field is not useful +as a guarantee of the transport header size. +] + + gso_size is the size of the packet beyond that header (ie. + MSS). + + If the driver negotiated the VIRTIO_NET_F_HOST_ECN feature, the + VIRTIO_NET_HDR_GSO_ECN bit may be set in “gso_type” as well, + indicating that the TCP packet has the ECN bit set.[footnote: +This case is not handled by some older hardware, so is called out +specifically in the protocol. +] + + If the driver negotiated the VIRTIO_NET_F_MRG_RXBUF feature, + the num_buffers field is set to zero. + + The header and packet are added as one output buffer to the + transmitq, and the device is notified of the new entry (see [sub:Notifying-The-Device] + ).[footnote: +Note that the header will be two bytes longer for the +VIRTIO_NET_F_MRG_RXBUF case. +] + + Packet Transmission Interrupt + +Often a driver will suppress transmission interrupts using the +VRING_AVAIL_F_NO_INTERRUPT flag (see [sub:Receiving-Used-Buffers] +) and check for used packets in the transmit path of following +packets. However, it will still receive interrupts if the +VIRTIO_F_NOTIFY_ON_EMPTY feature is negotiated, indicating that +the transmission queue is completely emptied. + +The normal behavior in this interrupt handler is to retrieve and +new descriptors from the used ring and free the corresponding +headers and packets. + + Setting Up Receive Buffers + +It is generally a good idea to keep the receive virtqueue as +fully populated as possible: if it runs out, network performance +will suffer. + +If the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6 or +VIRTIO_NET_F_GUEST_UFO features are used, the Guest will need to +accept packets of up to 65550 bytes long (the maximum size of a +TCP or UDP packet, plus the 14 byte ethernet header), otherwise +1514 bytes. So unless VIRTIO_NET_F_MRG_RXBUF is negotiated, every +buffer in the receive queue needs to be at least this length [footnote: +Obviously each one can be split across multiple descriptor +elements. +]. + +If VIRTIO_NET_F_MRG_RXBUF is negotiated, each buffer must be at +least the size of the struct virtio_net_hdr. + + Packet Receive Interrupt + +When a packet is copied into a buffer in the receiveq, the +optimal path is to disable further interrupts for the receiveq +(see [sub:Receiving-Used-Buffers]) and process packets until no +more are found, then re-enable them. + +Processing packet involves: + + If the driver negotiated the VIRTIO_NET_F_MRG_RXBUF feature, + then the “num_buffers” field indicates how many descriptors + this packet is spread over (including this one). This allows + receipt of large packets without having to allocate large + buffers. In this case, there will be at least “num_buffers” in + the used ring, and they should be chained together to form a + single packet. The other buffers will not begin with a struct + virtio_net_hdr. + + If the VIRTIO_NET_F_MRG_RXBUF feature was not negotiated, or + the “num_buffers” field is one, then the entire packet will be + contained within this buffer, immediately following the struct + virtio_net_hdr. + + If the VIRTIO_NET_F_GUEST_CSUM feature was negotiated, the + VIRTIO_NET_HDR_F_NEEDS_CSUM bit in the “flags” field may be + set: if so, the checksum on the packet is incomplete and the “ + csum_start” and “csum_offset” fields indicate how to calculate + it (see [ite:csum_start-is-set]). + + If the VIRTIO_NET_F_GUEST_TSO4, TSO6 or UFO options were + negotiated, then the “gso_type” may be something other than + VIRTIO_NET_HDR_GSO_NONE, and the “gso_size” field indicates the + desired MSS (see [enu:If-the-driver]).Control Virtqueue + +The driver uses the control virtqueue (if VIRTIO_NET_F_VTRL_VQ is +negotiated) to send commands to manipulate various features of +the device which would not easily map into the configuration +space. + +All commands are of the following form: + +struct virtio_net_ctrl { + + u8 class; + + u8 command; + + u8 command-specific-data[]; + + u8 ack; + +}; + + + +/* ack values */ + +#define VIRTIO_NET_OK 0 + +#define VIRTIO_NET_ERR 1 + +The class, command and command-specific-data are set by the +driver, and the device sets the ack byte. There is little it can +do except issue a diagnostic if the ack byte is not +VIRTIO_NET_OK. + + Packet Receive Filtering + +If the VIRTIO_NET_F_CTRL_RX feature is negotiated, the driver can +send control commands for promiscuous mode, multicast receiving, +and filtering of MAC addresses. + +Note that in general, these commands are best-effort: unwanted +packets may still arrive. + + Setting Promiscuous Mode + +#define VIRTIO_NET_CTRL_RX 0 + + #define VIRTIO_NET_CTRL_RX_PROMISC 0 + + #define VIRTIO_NET_CTRL_RX_ALLMULTI 1 + +The class VIRTIO_NET_CTRL_RX has two commands: +VIRTIO_NET_CTRL_RX_PROMISC turns promiscuous mode on and off, and +VIRTIO_NET_CTRL_RX_ALLMULTI turns all-multicast receive on and +off. The command-specific-data is one byte containing 0 (off) or +1 (on). + + Setting MAC Address Filtering + +struct virtio_net_ctrl_mac { + + u32 entries; + + u8 macs[entries][ETH_ALEN]; + +}; + + + +#define VIRTIO_NET_CTRL_MAC 1 + + #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + +The device can filter incoming packets by any number of +destination MAC addresses.[footnote: +Since there are no guarentees, it can use a hash filter +orsilently switch to allmulti or promiscuous mode if it is given +too many addresses. +] This table is set using the class VIRTIO_NET_CTRL_MAC and the +command VIRTIO_NET_CTRL_MAC_TABLE_SET. The command-specific-data +is two variable length tables of 6-byte MAC addresses. The first +table contains unicast addresses, and the second contains +multicast addresses. + + VLAN Filtering + +If the driver negotiates the VIRTION_NET_F_CTRL_VLAN feature, it +can control a VLAN filter table in the device. + +#define VIRTIO_NET_CTRL_VLAN 2 + + #define VIRTIO_NET_CTRL_VLAN_ADD 0 + + #define VIRTIO_NET_CTRL_VLAN_DEL 1 + +Both the VIRTIO_NET_CTRL_VLAN_ADD and VIRTIO_NET_CTRL_VLAN_DEL +command take a 16-bit VLAN id as the command-specific-data. + +Appendix D: Block Device + +The virtio block device is a simple virtual block device (ie. +disk). Read and write requests (and other exotic requests) are +placed in the queue, and serviced (probably out of order) by the +device except where noted. + + Configuration + + Subsystem Device ID 2 + + Virtqueues 0:requestq. + + Feature bits + + VIRTIO_BLK_F_BARRIER (0) Host supports request barriers. + + VIRTIO_BLK_F_SIZE_MAX (1) Maximum size of any single segment is + in “size_max”. + + VIRTIO_BLK_F_SEG_MAX (2) Maximum number of segments in a + request is in “seg_max”. + + VIRTIO_BLK_F_GEOMETRY (4) Disk-style geometry specified in “ + geometry”. + + VIRTIO_BLK_F_RO (5) Device is read-only. + + VIRTIO_BLK_F_BLK_SIZE (6) Block size of disk is in “blk_size”. + + VIRTIO_BLK_F_SCSI (7) Device supports scsi packet commands. + + VIRTIO_BLK_F_FLUSH (9) Cache flush command support. + + + + Device configuration layout The capacity of the device + (expressed in 512-byte sectors) is always present. The + availability of the others all depend on various feature bits + as indicated above. struct virtio_blk_config { + + u64 capacity; + + u32 size_max; + + u32 seg_max; + + struct virtio_blk_geometry { + + u16 cylinders; + + u8 heads; + + u8 sectors; + + } geometry; + + u32 blk_size; + + + +}; + + Device Initialization + + The device size should be read from the “capacity” + configuration field. No requests should be submitted which goes + beyond this limit. + + If the VIRTIO_BLK_F_BLK_SIZE feature is negotiated, the + blk_size field can be read to determine the optimal sector size + for the driver to use. This does not effect the units used in + the protocol (always 512 bytes), but awareness of the correct + value can effect performance. + + If the VIRTIO_BLK_F_RO feature is set by the device, any write + requests will fail. + + + + Device Operation + +The driver queues requests to the virtqueue, and they are used by +the device (not necessarily in order). Each request is of form: + +struct virtio_blk_req { + + + + u32 type; + + u32 ioprio; + + u64 sector; + + char data[][512]; + + u8 status; + +}; + +If the device has VIRTIO_BLK_F_SCSI feature, it can also support +scsi packet command requests, each of these requests is of form:struct virtio_scsi_pc_req { + + u32 type; + + u32 ioprio; + + u64 sector; + + char cmd[]; + + char data[][512]; + +#define SCSI_SENSE_BUFFERSIZE 96 + + u8 sense[SCSI_SENSE_BUFFERSIZE]; + + u32 errors; + + u32 data_len; + + u32 sense_len; + + u32 residual; + + u8 status; + +}; + +The type of the request is either a read (VIRTIO_BLK_T_IN), a +write (VIRTIO_BLK_T_OUT), a scsi packet command +(VIRTIO_BLK_T_SCSI_CMD or VIRTIO_BLK_T_SCSI_CMD_OUT[footnote: +the SCSI_CMD and SCSI_CMD_OUT types are equivalent, the device +does not distinguish between them +]) or a flush (VIRTIO_BLK_T_FLUSH or VIRTIO_BLK_T_FLUSH_OUT[footnote: +the FLUSH and FLUSH_OUT types are equivalent, the device does not +distinguish between them +]). If the device has VIRTIO_BLK_F_BARRIER feature the high bit +(VIRTIO_BLK_T_BARRIER) indicates that this request acts as a +barrier and that all preceeding requests must be complete before +this one, and all following requests must not be started until +this is complete. Note that a barrier does not flush caches in +the underlying backend device in host, and thus does not serve as +data consistency guarantee. Driver must use FLUSH request to +flush the host cache. + +#define VIRTIO_BLK_T_IN 0 + +#define VIRTIO_BLK_T_OUT 1 + +#define VIRTIO_BLK_T_SCSI_CMD 2 + +#define VIRTIO_BLK_T_SCSI_CMD_OUT 3 + +#define VIRTIO_BLK_T_FLUSH 4 + +#define VIRTIO_BLK_T_FLUSH_OUT 5 + +#define VIRTIO_BLK_T_BARRIER 0x80000000 + +The ioprio field is a hint about the relative priorities of +requests to the device: higher numbers indicate more important +requests. + +The sector number indicates the offset (multiplied by 512) where +the read or write is to occur. This field is unused and set to 0 +for scsi packet commands and for flush commands. + +The cmd field is only present for scsi packet command requests, +and indicates the command to perform. This field must reside in a +single, separate read-only buffer; command length can be derived +from the length of this buffer. + +Note that these first three (four for scsi packet commands) +fields are always read-only: the data field is either read-only +or write-only, depending on the request. The size of the read or +write can be derived from the total size of the request buffers. + +The sense field is only present for scsi packet command requests, +and indicates the buffer for scsi sense data. + +The data_len field is only present for scsi packet command +requests, this field is deprecated, and should be ignored by the +driver. Historically, devices copied data length there. + +The sense_len field is only present for scsi packet command +requests and indicates the number of bytes actually written to +the sense buffer. + +The residual field is only present for scsi packet command +requests and indicates the residual size, calculated as data +length - number of bytes actually transferred. + +The final status byte is written by the device: either +VIRTIO_BLK_S_OK for success, VIRTIO_BLK_S_IOERR for host or guest +error or VIRTIO_BLK_S_UNSUPP for a request unsupported by host:#define VIRTIO_BLK_S_OK 0 + +#define VIRTIO_BLK_S_IOERR 1 + +#define VIRTIO_BLK_S_UNSUPP 2 + +Historically, devices assumed that the fields type, ioprio and +sector reside in a single, separate read-only buffer; the fields +errors, data_len, sense_len and residual reside in a single, +separate write-only buffer; the sense field in a separate +write-only buffer of size 96 bytes, by itself; the fields errors, +data_len, sense_len and residual in a single write-only buffer; +and the status field is a separate read-only buffer of size 1 +byte, by itself. + +Appendix E: Console Device + +The virtio console device is a simple device for data input and +output. A device may have one or more ports. Each port has a pair +of input and output virtqueues. Moreover, a device has a pair of +control IO virtqueues. The control virtqueues are used to +communicate information between the device and the driver about +ports being opened and closed on either side of the connection, +indication from the host about whether a particular port is a +console port, adding new ports, port hot-plug/unplug, etc., and +indication from the guest about whether a port or a device was +successfully added, port open/close, etc.. For data IO, one or +more empty buffers are placed in the receive queue for incoming +data and outgoing characters are placed in the transmit queue. + + Configuration + + Subsystem Device ID 3 + + Virtqueues 0:receiveq(port0). 1:transmitq(port0), 2:control + receiveq[footnote: +Ports 2 onwards only if VIRTIO_CONSOLE_F_MULTIPORT is set +], 3:control transmitq, 4:receiveq(port1), 5:transmitq(port1), + ... + + Feature bits + + VIRTIO_CONSOLE_F_SIZE (0) Configuration cols and rows fields + are valid. + + VIRTIO_CONSOLE_F_MULTIPORT(1) Device has support for multiple + ports; configuration fields nr_ports and max_nr_ports are + valid and control virtqueues will be used. + + Device configuration layout The size of the console is supplied + in the configuration space if the VIRTIO_CONSOLE_F_SIZE feature + is set. Furthermore, if the VIRTIO_CONSOLE_F_MULTIPORT feature + is set, the maximum number of ports supported by the device can + be fetched.struct virtio_console_config { + + u16 cols; + + u16 rows; + + + + u32 max_nr_ports; + +}; + + Device Initialization + + If the VIRTIO_CONSOLE_F_SIZE feature is negotiated, the driver + can read the console dimensions from the configuration fields. + + If the VIRTIO_CONSOLE_F_MULTIPORT feature is negotiated, the + driver can spawn multiple ports, not all of which may be + attached to a console. Some could be generic ports. In this + case, the control virtqueues are enabled and according to the + max_nr_ports configuration-space value, the appropriate number + of virtqueues are created. A control message indicating the + driver is ready is sent to the host. The host can then send + control messages for adding new ports to the device. After + creating and initializing each port, a + VIRTIO_CONSOLE_PORT_READY control message is sent to the host + for that port so the host can let us know of any additional + configuration options set for that port. + + The receiveq for each port is populated with one or more + receive buffers. + + Device Operation + + For output, a buffer containing the characters is placed in the + port's transmitq.[footnote: +Because this is high importance and low bandwidth, the current +Linux implementation polls for the buffer to be used, rather than +waiting for an interrupt, simplifying the implementation +significantly. However, for generic serial ports with the +O_NONBLOCK flag set, the polling limitation is relaxed and the +consumed buffers are freed upon the next write or poll call or +when a port is closed or hot-unplugged. +] + + When a buffer is used in the receiveq (signalled by an + interrupt), the contents is the input to the port associated + with the virtqueue for which the notification was received. + + If the driver negotiated the VIRTIO_CONSOLE_F_SIZE feature, a + configuration change interrupt may occur. The updated size can + be read from the configuration fields. + + If the driver negotiated the VIRTIO_CONSOLE_F_MULTIPORT + feature, active ports are announced by the host using the + VIRTIO_CONSOLE_PORT_ADD control message. The same message is + used for port hot-plug as well. + + If the host specified a port `name', a sysfs attribute is + created with the name filled in, so that udev rules can be + written that can create a symlink from the port's name to the + char device for port discovery by applications in the guest. + + Changes to ports' state are effected by control messages. + Appropriate action is taken on the port indicated in the + control message. The layout of the structure of the control + buffer and the events associated are:struct virtio_console_control { + + uint32_t id; /* Port number */ + + uint16_t event; /* The kind of control event */ + + uint16_t value; /* Extra information for the event */ + +}; + + + +/* Some events for the internal messages (control packets) */ + + + +#define VIRTIO_CONSOLE_DEVICE_READY 0 + +#define VIRTIO_CONSOLE_PORT_ADD 1 + +#define VIRTIO_CONSOLE_PORT_REMOVE 2 + +#define VIRTIO_CONSOLE_PORT_READY 3 + +#define VIRTIO_CONSOLE_CONSOLE_PORT 4 + +#define VIRTIO_CONSOLE_RESIZE 5 + +#define VIRTIO_CONSOLE_PORT_OPEN 6 + +#define VIRTIO_CONSOLE_PORT_NAME 7 + +Appendix F: Entropy Device + +The virtio entropy device supplies high-quality randomness for +guest use. + + Configuration + + Subsystem Device ID 4 + + Virtqueues 0:requestq. + + Feature bits None currently defined + + Device configuration layout None currently defined. + + Device Initialization + + The virtqueue is initialized + + Device Operation + +When the driver requires random bytes, it places the descriptor +of one or more buffers in the queue. It will be completely filled +by random data by the device. + +Appendix G: Memory Balloon Device + +The virtio memory balloon device is a primitive device for +managing guest memory: the device asks for a certain amount of +memory, and the guest supplies it (or withdraws it, if the device +has more than it asks for). This allows the guest to adapt to +changes in allowance of underlying physical memory. If the +feature is negotiated, the device can also be used to communicate +guest memory statistics to the host. + + Configuration + + Subsystem Device ID 5 + + Virtqueues 0:inflateq. 1:deflateq. 2:statsq.[footnote: +Only if VIRTIO_BALLON_F_STATS_VQ set +] + + Feature bits + + VIRTIO_BALLOON_F_MUST_TELL_HOST (0) Host must be told before + pages from the balloon are used. + + VIRTIO_BALLOON_F_STATS_VQ (1) A virtqueue for reporting guest + memory statistics is present. + + Device configuration layout Both fields of this configuration + are always available. Note that they are little endian, despite + convention that device fields are guest endian:struct virtio_balloon_config { + + u32 num_pages; + + u32 actual; + +}; + + Device Initialization + + The inflate and deflate virtqueues are identified. + + If the VIRTIO_BALLOON_F_STATS_VQ feature bit is negotiated: + + Identify the stats virtqueue. + + Add one empty buffer to the stats virtqueue and notify the + host. + +Device operation begins immediately. + + Device Operation + + Memory Ballooning The device is driven by the receipt of a + configuration change interrupt. + + The “num_pages” configuration field is examined. If this is + greater than the “actual” number of pages, memory must be given + to the balloon. If it is less than the “actual” number of + pages, memory may be taken back from the balloon for general + use. + + To supply memory to the balloon (aka. inflate): + + The driver constructs an array of addresses of unused memory + pages. These addresses are divided by 4096[footnote: +This is historical, and independent of the guest page size +] and the descriptor describing the resulting 32-bit array is + added to the inflateq. + + To remove memory from the balloon (aka. deflate): + + The driver constructs an array of addresses of memory pages it + has previously given to the balloon, as described above. This + descriptor is added to the deflateq. + + If the VIRTIO_BALLOON_F_MUST_TELL_HOST feature is set, the + guest may not use these requested pages until that descriptor + in the deflateq has been used by the device. + + Otherwise, the guest may begin to re-use pages previously given + to the balloon before the device has acknowledged their + withdrawl. [footnote: +In this case, deflation advice is merely a courtesy +] + + In either case, once the device has completed the inflation or + deflation, the “actual” field of the configuration should be + updated to reflect the new number of pages in the balloon.[footnote: +As updates to configuration space are not atomic, this field +isn't particularly reliable, but can be used to diagnose buggy +guests. +] + + Memory Statistics + +The stats virtqueue is atypical because communication is driven +by the device (not the driver). The channel becomes active at +driver initialization time when the driver adds an empty buffer +and notifies the device. A request for memory statistics proceeds +as follows: + + The device pushes the buffer onto the used ring and sends an + interrupt. + + The driver pops the used buffer and discards it. + + The driver collects memory statistics and writes them into a + new buffer. + + The driver adds the buffer to the virtqueue and notifies the + device. + + The device pops the buffer (retaining it to initiate a + subsequent request) and consumes the statistics. + + Memory Statistics Format Each statistic consists of a 16 bit + tag and a 64 bit value. Both quantities are represented in the + native endian of the guest. All statistics are optional and the + driver may choose which ones to supply. To guarantee backwards + compatibility, unsupported statistics should be omitted. + + struct virtio_balloon_stat { + +#define VIRTIO_BALLOON_S_SWAP_IN 0 + +#define VIRTIO_BALLOON_S_SWAP_OUT 1 + +#define VIRTIO_BALLOON_S_MAJFLT 2 + +#define VIRTIO_BALLOON_S_MINFLT 3 + +#define VIRTIO_BALLOON_S_MEMFREE 4 + +#define VIRTIO_BALLOON_S_MEMTOT 5 + + u16 tag; + + u64 val; + +} __attribute__((packed)); + + Tags + + VIRTIO_BALLOON_S_SWAP_IN The amount of memory that has been + swapped in (in bytes). + + VIRTIO_BALLOON_S_SWAP_OUT The amount of memory that has been + swapped out to disk (in bytes). + + VIRTIO_BALLOON_S_MAJFLT The number of major page faults that + have occurred. + + VIRTIO_BALLOON_S_MINFLT The number of minor page faults that + have occurred. + + VIRTIO_BALLOON_S_MEMFREE The amount of memory not being used + for any purpose (in bytes). + + VIRTIO_BALLOON_S_MEMTOT The total amount of memory available + (in bytes). + -- cgit v1.2.3 From e22a539824e8ddb82c87b4f415165ede82e6ab56 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 15 Aug 2011 10:15:10 +0930 Subject: lguest: allow booting guest with CONFIG_RELOCATABLE=y The CONFIG_RELOCATABLE code tries to align the unpack destination to the value of 'kernel_alignment' in the setup_hdr. If that's 0, it tries to unpack to address 0, which in fact causes the gunzip code to call 'error("Out of memory while allocating output buffer")'. The bootloader (ie. the lguest Launcher in this case) should be doing setting this field; the normal bzImage is 16M, we can use the same. Reported-by: Stefanos Geraggelos Signed-off-by: Rusty Russell Cc: stable@kernel.org --- Documentation/virtual/lguest/lguest.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c index 043bd7df3139..d928c134dee6 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/Documentation/virtual/lguest/lguest.c @@ -1996,6 +1996,9 @@ int main(int argc, char *argv[]) /* We use a simple helper to copy the arguments separated by spaces. */ concat((char *)(boot + 1), argv+optind+2); + /* Set kernel alignment to 16M (CONFIG_PHYSICAL_ALIGN) */ + boot->hdr.kernel_alignment = 0x1000000; + /* Boot protocol version: 2.07 supports the fields for lguest. */ boot->hdr.version = 0x207; -- cgit v1.2.3 From eade7b281c9fc18401b989c77d5e5e660b25a3b7 Mon Sep 17 00:00:00 2001 From: Daniel T Chen Date: Sun, 14 Aug 2011 22:43:01 -0400 Subject: ALSA: ac97: Add HP Compaq dc5100 SFF(PT003AW) to Headphone Jack Sense whitelist BugLink: https://bugs.launchpad.net/bugs/826081 The original reporter needs 'Headphone Jack Sense' enabled to have audible audio, so add his PCI SSID to the whitelist. Reported-and-tested-by: Muhammad Khurram Khan Cc: Signed-off-by: Daniel T Chen Signed-off-by: Takashi Iwai --- sound/pci/ac97/ac97_patch.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c index 200c9a1d48b7..a872d0a82976 100644 --- a/sound/pci/ac97/ac97_patch.c +++ b/sound/pci/ac97/ac97_patch.c @@ -1909,6 +1909,7 @@ static unsigned int ad1981_jacks_whitelist[] = { 0x103c0944, /* HP nc6220 */ 0x103c0934, /* HP nc8220 */ 0x103c006d, /* HP nx9105 */ + 0x103c300d, /* HP Compaq dc5100 SFF(PT003AW) */ 0x17340088, /* FSC Scenic-W */ 0 /* end */ }; -- cgit v1.2.3 From d5811e8731213f80c80d89e980505052f16aca1c Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Sat, 13 Aug 2011 13:36:13 -0400 Subject: drm/radeon/kms: don't try to be smart in the hpd handler Attempting to try and turn off disconnected display hw in the hotput handler lead to more problems than it helped. For now just register an event and only attempt the do something interesting with DP. Other connectors are just too problematic: - Some systems have an HPD pin assigned to LVDS, but it's rarely if ever connected properly and we don't really care about hpd events on LVDS anyway since it's always connected. - The HPD pin is wired up correctly for eDP, but we don't really have to do anything since the events since it's always connected. - Some HPD pins fire more than once when you connect/disconnect - etc. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=39882 Signed-off-by: Alex Deucher Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/atombios_dp.c | 12 ++++++++++++ drivers/gpu/drm/radeon/radeon_connectors.c | 14 ++++++-------- drivers/gpu/drm/radeon/radeon_mode.h | 1 + 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c index 645b84b3d203..7ad43c6b1db7 100644 --- a/drivers/gpu/drm/radeon/atombios_dp.c +++ b/drivers/gpu/drm/radeon/atombios_dp.c @@ -613,6 +613,18 @@ static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector, return true; } +bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector) +{ + u8 link_status[DP_LINK_STATUS_SIZE]; + struct radeon_connector_atom_dig *dig = radeon_connector->con_priv; + + if (!radeon_dp_get_link_status(radeon_connector, link_status)) + return false; + if (dp_channel_eq_ok(link_status, dig->dp_lane_count)) + return false; + return true; +} + struct radeon_dp_link_train_info { struct radeon_device *rdev; struct drm_encoder *encoder; diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 441e07054853..7f65940f918f 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -64,18 +64,16 @@ void radeon_connector_hotplug(struct drm_connector *connector) if (connector->dpms != DRM_MODE_DPMS_ON) return; - /* powering up/down the eDP panel generates hpd events which - * can interfere with modesetting. - */ - if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) - return; + /* just deal with DP (not eDP) here. */ + if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) { + int saved_dpms = connector->dpms; - /* pre-r600 did not always have the hpd pins mapped accurately to connectors */ - if (rdev->family >= CHIP_R600) { - if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) + if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd) && + radeon_dp_needs_link_train(radeon_connector)) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON); else drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); + connector->dpms = saved_dpms; } } diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index d09031c03e26..68820f5f6303 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -479,6 +479,7 @@ extern void radeon_dp_set_link_config(struct drm_connector *connector, struct drm_display_mode *mode); extern void radeon_dp_link_train(struct drm_encoder *encoder, struct drm_connector *connector); +extern bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector); extern u8 radeon_dp_getsinktype(struct radeon_connector *radeon_connector); extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector); extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode); -- cgit v1.2.3 From 2a004c686e7997ddb795dbce10b263e241f9bdaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Fri, 17 Jun 2011 11:09:07 +0100 Subject: ARM: 6965/1: ep93xx: add model detection for ts-7300 and ts-7400 boards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Ryan Mallon Acked-by: H Hartley Sweeten Signed-off-by: Petr Štetiar Signed-off-by: Russell King --- arch/arm/mach-ep93xx/include/mach/ts72xx.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm/mach-ep93xx/include/mach/ts72xx.h b/arch/arm/mach-ep93xx/include/mach/ts72xx.h index 0eabec62cd9d..ee7f87589efa 100644 --- a/arch/arm/mach-ep93xx/include/mach/ts72xx.h +++ b/arch/arm/mach-ep93xx/include/mach/ts72xx.h @@ -20,6 +20,8 @@ #define TS72XX_MODEL_TS7200 0x00 #define TS72XX_MODEL_TS7250 0x01 #define TS72XX_MODEL_TS7260 0x02 +#define TS72XX_MODEL_TS7300 0x03 +#define TS72XX_MODEL_TS7400 0x04 #define TS72XX_OPTIONS_PHYS_BASE 0x22400000 @@ -66,6 +68,16 @@ static inline int board_is_ts7260(void) return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7260; } +static inline int board_is_ts7300(void) +{ + return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7300; +} + +static inline int board_is_ts7400(void) +{ + return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7400; +} + static inline int is_max197_installed(void) { return !!(__raw_readb(TS72XX_OPTIONS_VIRT_BASE) & -- cgit v1.2.3 From 505ed6fd82608bd4f26d487220ec40a3c5d0dded Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Fri, 17 Jun 2011 11:11:59 +0100 Subject: ARM: 6967/1: ep93xx: ts72xx: fix board model detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the obvious error in board detection logic, because according to the TS's manual, the model is stored in the least three significant bits. For example the byte read on my ts-7300 is 0x23 and the detection then fails. Cc: Ryan Mallon Acked-by: H Hartley Sweeten Signed-off-by: Petr Štetiar Signed-off-by: Russell King --- arch/arm/mach-ep93xx/include/mach/ts72xx.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-ep93xx/include/mach/ts72xx.h b/arch/arm/mach-ep93xx/include/mach/ts72xx.h index ee7f87589efa..f1397a13e76b 100644 --- a/arch/arm/mach-ep93xx/include/mach/ts72xx.h +++ b/arch/arm/mach-ep93xx/include/mach/ts72xx.h @@ -6,7 +6,7 @@ * TS72xx memory map: * * virt phys size - * febff000 22000000 4K model number register + * febff000 22000000 4K model number register (bits 0-2) * febfe000 22400000 4K options register * febfd000 22800000 4K options register #2 * febf9000 10800000 4K TS-5620 RTC index register @@ -22,6 +22,7 @@ #define TS72XX_MODEL_TS7260 0x02 #define TS72XX_MODEL_TS7300 0x03 #define TS72XX_MODEL_TS7400 0x04 +#define TS72XX_MODEL_MASK 0x07 #define TS72XX_OPTIONS_PHYS_BASE 0x22400000 @@ -53,29 +54,34 @@ #ifndef __ASSEMBLY__ +static inline int ts72xx_model(void) +{ + return __raw_readb(TS72XX_MODEL_VIRT_BASE) & TS72XX_MODEL_MASK; +} + static inline int board_is_ts7200(void) { - return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7200; + return ts72xx_model() == TS72XX_MODEL_TS7200; } static inline int board_is_ts7250(void) { - return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7250; + return ts72xx_model() == TS72XX_MODEL_TS7250; } static inline int board_is_ts7260(void) { - return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7260; + return ts72xx_model() == TS72XX_MODEL_TS7260; } static inline int board_is_ts7300(void) { - return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7300; + return ts72xx_model() == TS72XX_MODEL_TS7300; } static inline int board_is_ts7400(void) { - return __raw_readb(TS72XX_MODEL_VIRT_BASE) == TS72XX_MODEL_TS7400; + return ts72xx_model() == TS72XX_MODEL_TS7400; } static inline int is_max197_installed(void) -- cgit v1.2.3 From 43c734be5571a4daad9f0a3e0b3229a1c0049917 Mon Sep 17 00:00:00 2001 From: Srinivas Kandagatla Date: Mon, 15 Aug 2011 10:43:44 +0100 Subject: ARM: 7014/1: cache-l2x0: Fix L2 Cache size calculation. This patch fixes L2 Cache size calculations for L2C-210, L2C-310 and PL310, by changing the L2X0_AUX_CTRL_WAY_SIZE_MASK from 2 bits to 3 bits. The Auxiliary Control Register for L2C-210, L2C-310 and PL310 has 3bits [19:17] for Way size, however the existing code only uses 2 bits to get this value. This results in incorrect cachesize calculations. It also results in performing operations on the whole cache when we erroneously decide that the range is big enough (due to l2x0_size being too small) and also prints incorrect cachesize. Signed-off-by: Srinivas Kandagatla Acked-by: Will Deacon Cc: stable@kernel.org Signed-off-by: Russell King --- arch/arm/include/asm/hardware/cache-l2x0.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/hardware/cache-l2x0.h b/arch/arm/include/asm/hardware/cache-l2x0.h index 16bd48031583..bfa706ffd968 100644 --- a/arch/arm/include/asm/hardware/cache-l2x0.h +++ b/arch/arm/include/asm/hardware/cache-l2x0.h @@ -64,7 +64,7 @@ #define L2X0_AUX_CTRL_MASK 0xc0000fff #define L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT 16 #define L2X0_AUX_CTRL_WAY_SIZE_SHIFT 17 -#define L2X0_AUX_CTRL_WAY_SIZE_MASK (0x3 << 17) +#define L2X0_AUX_CTRL_WAY_SIZE_MASK (0x7 << 17) #define L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT 22 #define L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT 26 #define L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT 27 -- cgit v1.2.3 From 145e10e173c8adf4804334fb0dd10028300a7a7a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 15 Aug 2011 11:04:41 +0100 Subject: ARM: 7015/1: ARM errata: Possible cache data corruption with hit-under-miss enabled This patch is a workaround for the 364296 ARM1136 r0p2 erratum (possible cache data corruption with hit-under-miss enabled). It sets the undocumented bit 31 in the auxiliary control register and the FI bit in the control register, thus disabling hit-under-miss without putting the processor into full low interrupt latency mode. Signed-off-by: Catalin Marinas Tested-by: Siarhei Siamashka Signed-off-by: Russell King --- arch/arm/Kconfig | 12 ++++++++++++ arch/arm/mm/proc-v6.S | 16 ++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5ebc5d922ea1..3269576dbfa8 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1271,6 +1271,18 @@ config ARM_ERRATA_754327 This workaround defines cpu_relax() as smp_mb(), preventing correctly written polling loops from denying visibility of updates to memory. +config ARM_ERRATA_364296 + bool "ARM errata: Possible cache data corruption with hit-under-miss enabled" + depends on CPU_V6 && !SMP + help + This options enables the workaround for the 364296 ARM1136 + r0p2 erratum (possible cache data corruption with + hit-under-miss enabled). It sets the undocumented bit 31 in + the auxiliary control register and the FI bit in the control + register, thus disabling hit-under-miss without putting the + processor into full low interrupt latency mode. ARM11MPCore + is not affected. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index 219138d2f158..a923aa0fd00d 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -223,6 +223,22 @@ __v6_setup: mrc p15, 0, r0, c1, c0, 0 @ read control register bic r0, r0, r5 @ clear bits them orr r0, r0, r6 @ set them +#ifdef CONFIG_ARM_ERRATA_364296 + /* + * Workaround for the 364296 ARM1136 r0p2 erratum (possible cache data + * corruption with hit-under-miss enabled). The conditional code below + * (setting the undocumented bit 31 in the auxiliary control register + * and the FI bit in the control register) disables hit-under-miss + * without putting the processor into full low interrupt latency mode. + */ + ldr r6, =0x4107b362 @ id for ARM1136 r0p2 + mrc p15, 0, r5, c0, c0, 0 @ get processor id + teq r5, r6 @ check for the faulty core + mrceq p15, 0, r5, c1, c0, 1 @ load aux control reg + orreq r5, r5, #(1 << 31) @ set the undocumented bit 31 + mcreq p15, 0, r5, c1, c0, 1 @ write aux control reg + orreq r0, r0, #(1 << 21) @ low interrupt latency configuration +#endif mov pc, lr @ return to head.S:__ret /* -- cgit v1.2.3 From d2b4c7bd7eabfaa2e3e5b8107d5eeb56ac879813 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 13 Aug 2011 19:15:01 +0800 Subject: ASoC: soc-jack: Fix checking return value of request_any_context_irq request_any_context_irq() returns a negative value on failure. On success, it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED. Signed-off-by: Axel Lin Signed-off-by: Mark Brown Cc: stable@kernel.orG --- sound/soc/soc-jack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-jack.c b/sound/soc/soc-jack.c index 7c17b98d5846..38b00131b2fe 100644 --- a/sound/soc/soc-jack.c +++ b/sound/soc/soc-jack.c @@ -327,7 +327,7 @@ int snd_soc_jack_add_gpios(struct snd_soc_jack *jack, int count, IRQF_TRIGGER_FALLING, gpios[i].name, &gpios[i]); - if (ret) + if (ret < 0) goto err; if (gpios[i].wake) { -- cgit v1.2.3 From 161d55c3ec4c7e26c96b11dc86caea0b3c9c6b0f Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 13 Aug 2011 11:33:08 +0800 Subject: ASoC: sta32x: Fix a memory leak if snd_soc_register_codec fails Signed-off-by: Axel Lin Signed-off-by: Mark Brown --- sound/soc/codecs/sta32x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/sta32x.c b/sound/soc/codecs/sta32x.c index 409d89d1f34c..fbd7eb9e61ce 100644 --- a/sound/soc/codecs/sta32x.c +++ b/sound/soc/codecs/sta32x.c @@ -857,6 +857,7 @@ static __devinit int sta32x_i2c_probe(struct i2c_client *i2c, ret = snd_soc_register_codec(&i2c->dev, &sta32x_codec, &sta32x_dai, 1); if (ret != 0) { dev_err(&i2c->dev, "Failed to register codec (%d)\n", ret); + kfree(sta32x); return ret; } -- cgit v1.2.3 From bf545ed72f2eeac664695a8ea2199d9ddaef6020 Mon Sep 17 00:00:00 2001 From: Scott Jiang Date: Fri, 12 Aug 2011 18:04:10 -0400 Subject: ASoC: ad193x: fix registers definition fix dac word len mask and adc tdm fmt shift value Signed-off-by: Scott Jiang Signed-off-by: Mark Brown Cc: stable@kernel.org --- sound/soc/codecs/ad193x.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/ad193x.h b/sound/soc/codecs/ad193x.h index 9747b5497877..c1029d242c84 100644 --- a/sound/soc/codecs/ad193x.h +++ b/sound/soc/codecs/ad193x.h @@ -34,7 +34,7 @@ #define AD193X_DAC_LEFT_HIGH (1 << 3) #define AD193X_DAC_BCLK_INV (1 << 7) #define AD193X_DAC_CTRL2 0x804 -#define AD193X_DAC_WORD_LEN_MASK 0xC +#define AD193X_DAC_WORD_LEN_MASK 0x18 #define AD193X_DAC_MASTER_MUTE 1 #define AD193X_DAC_CHNL_MUTE 0x805 #define AD193X_DACL1_MUTE 0 @@ -63,7 +63,7 @@ #define AD193X_ADC_CTRL1 0x80f #define AD193X_ADC_SERFMT_MASK 0x60 #define AD193X_ADC_SERFMT_STEREO (0 << 5) -#define AD193X_ADC_SERFMT_TDM (1 << 2) +#define AD193X_ADC_SERFMT_TDM (1 << 5) #define AD193X_ADC_SERFMT_AUX (2 << 5) #define AD193X_ADC_WORD_LEN_MASK 0x3 #define AD193X_ADC_CTRL2 0x810 -- cgit v1.2.3 From 95c93d8525ebce1024bda7316f602ae45c36cd6f Mon Sep 17 00:00:00 2001 From: Scott Jiang Date: Fri, 12 Aug 2011 18:04:11 -0400 Subject: ASoC: ad193x: fix dac word len setting dac word len value should left shift before setting Signed-off-by: Scott Jiang Acked-by: Barry Song <21cnbao@gmail.com> Signed-off-by: Mark Brown Cc: stable@kernel.org --- sound/soc/codecs/ad193x.c | 3 ++- sound/soc/codecs/ad193x.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/codecs/ad193x.c b/sound/soc/codecs/ad193x.c index 2374ca5ffe68..f1a8be58255b 100644 --- a/sound/soc/codecs/ad193x.c +++ b/sound/soc/codecs/ad193x.c @@ -307,7 +307,8 @@ static int ad193x_hw_params(struct snd_pcm_substream *substream, snd_soc_write(codec, AD193X_PLL_CLK_CTRL0, reg); reg = snd_soc_read(codec, AD193X_DAC_CTRL2); - reg = (reg & (~AD193X_DAC_WORD_LEN_MASK)) | word_len; + reg = (reg & (~AD193X_DAC_WORD_LEN_MASK)) + | (word_len << AD193X_DAC_WORD_LEN_SHFT); snd_soc_write(codec, AD193X_DAC_CTRL2, reg); reg = snd_soc_read(codec, AD193X_ADC_CTRL1); diff --git a/sound/soc/codecs/ad193x.h b/sound/soc/codecs/ad193x.h index c1029d242c84..cccc2e8e5fbd 100644 --- a/sound/soc/codecs/ad193x.h +++ b/sound/soc/codecs/ad193x.h @@ -34,6 +34,7 @@ #define AD193X_DAC_LEFT_HIGH (1 << 3) #define AD193X_DAC_BCLK_INV (1 << 7) #define AD193X_DAC_CTRL2 0x804 +#define AD193X_DAC_WORD_LEN_SHFT 3 #define AD193X_DAC_WORD_LEN_MASK 0x18 #define AD193X_DAC_MASTER_MUTE 1 #define AD193X_DAC_CHNL_MUTE 0x805 -- cgit v1.2.3 From 25ea524bed0202f823a0adcbbda68e86a22e3670 Mon Sep 17 00:00:00 2001 From: Scott Jiang Date: Fri, 12 Aug 2011 18:04:12 -0400 Subject: ASoC: ad193x: fix system clock system clock is 24.576MHz instead of 12.288MHz Signed-off-by: Scott Jiang Signed-off-by: Mark Brown --- sound/soc/blackfin/bf5xx-ad193x.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/blackfin/bf5xx-ad193x.c b/sound/soc/blackfin/bf5xx-ad193x.c index d6651c033cb7..a118a0fb9d81 100644 --- a/sound/soc/blackfin/bf5xx-ad193x.c +++ b/sound/soc/blackfin/bf5xx-ad193x.c @@ -56,7 +56,7 @@ static int bf5xx_ad193x_hw_params(struct snd_pcm_substream *substream, switch (params_rate(params)) { case 48000: - clk = 12288000; + clk = 24576000; break; } -- cgit v1.2.3 From 396a2e79cdbd562bf7ea48132f8d3ba8304109b2 Mon Sep 17 00:00:00 2001 From: Scott Jiang Date: Fri, 12 Aug 2011 18:04:13 -0400 Subject: ASoC: Add spi hw read function for 16 addr 8 data mode for ad193x fix [This will be used by the ad193x driver to fix the fact that the original author of the driver put a bodge for their particular chip into a the generic ASoC register I/O abstraction layer which looked like an obvious bug which ended up getting fixed in 3.0. Sadly there were no comments documenting what was going on. A minimally invasive correction to the driver is to remove the register cache support and go direct to the hardware all the time so we're adding a new feature -- broonie] Signed-off-by: Scott Jiang Signed-off-by: Mark Brown --- sound/soc/soc-io.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sound/soc/soc-io.c b/sound/soc/soc-io.c index cca490c80589..a62f7dd4ba96 100644 --- a/sound/soc/soc-io.c +++ b/sound/soc/soc-io.c @@ -205,6 +205,25 @@ static unsigned int snd_soc_16_8_read_i2c(struct snd_soc_codec *codec, #define snd_soc_16_8_read_i2c NULL #endif +#if defined(CONFIG_SPI_MASTER) +static unsigned int snd_soc_16_8_read_spi(struct snd_soc_codec *codec, + unsigned int r) +{ + struct spi_device *spi = codec->control_data; + + const u16 reg = cpu_to_be16(r | 0x100); + u8 data; + int ret; + + ret = spi_write_then_read(spi, ®, 2, &data, 1); + if (ret < 0) + return 0; + return data; +} +#else +#define snd_soc_16_8_read_spi NULL +#endif + static int snd_soc_16_8_write(struct snd_soc_codec *codec, unsigned int reg, unsigned int value) { @@ -295,6 +314,7 @@ static struct { int (*write)(struct snd_soc_codec *codec, unsigned int, unsigned int); unsigned int (*read)(struct snd_soc_codec *, unsigned int); unsigned int (*i2c_read)(struct snd_soc_codec *, unsigned int); + unsigned int (*spi_read)(struct snd_soc_codec *, unsigned int); } io_types[] = { { .addr_bits = 4, .data_bits = 12, @@ -318,6 +338,7 @@ static struct { .addr_bits = 16, .data_bits = 8, .write = snd_soc_16_8_write, .i2c_read = snd_soc_16_8_read_i2c, + .spi_read = snd_soc_16_8_read_spi, }, { .addr_bits = 16, .data_bits = 16, @@ -383,6 +404,8 @@ int snd_soc_codec_set_cache_io(struct snd_soc_codec *codec, #ifdef CONFIG_SPI_MASTER codec->hw_write = do_spi_write; #endif + if (io_types[i].spi_read) + codec->hw_read = io_types[i].spi_read; codec->control_data = container_of(codec->dev, struct spi_device, -- cgit v1.2.3 From 0cc62e926324d4f3bd02d378baafbe73164fca35 Mon Sep 17 00:00:00 2001 From: Scott Jiang Date: Fri, 12 Aug 2011 18:04:14 -0400 Subject: ASoC: ad193x: remove cache support asoc cache layer can't support this kind of spi registers well. remove cache support and read/write registers directly Signed-off-by: Scott Jiang Signed-off-by: Mark Brown --- sound/soc/codecs/ad193x.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sound/soc/codecs/ad193x.c b/sound/soc/codecs/ad193x.c index f1a8be58255b..eedb6f5e5823 100644 --- a/sound/soc/codecs/ad193x.c +++ b/sound/soc/codecs/ad193x.c @@ -27,11 +27,6 @@ struct ad193x_priv { int sysclk; }; -/* ad193x register cache & default register settings */ -static const u8 ad193x_reg[AD193X_NUM_REGS] = { - 0, 0, 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, -}; - /* * AD193X volume/mute/de-emphasis etc. controls */ @@ -390,9 +385,6 @@ static int ad193x_probe(struct snd_soc_codec *codec) static struct snd_soc_codec_driver soc_codec_dev_ad193x = { .probe = ad193x_probe, - .reg_cache_default = ad193x_reg, - .reg_cache_size = AD193X_NUM_REGS, - .reg_word_size = sizeof(u16), }; #if defined(CONFIG_SPI_MASTER) -- cgit v1.2.3 From 8a9af4fdf6d5eeb3200a088354d266a87e8260b0 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Tue, 9 Aug 2011 16:31:54 -0700 Subject: USB: Avoid NULL pointer deref in usb_hcd_alloc_bandwidth. usb_ifnum_to_if() can return NULL if the USB device does not have a configuration installed (usb_device->actconfig == NULL), or if we can't find the interface number in the installed configuration. Return an error instead of crashing. Signed-off-by: Sarah Sharp --- drivers/usb/core/hcd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 8669ba3fe794..73cbbd85219f 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1775,6 +1775,8 @@ int usb_hcd_alloc_bandwidth(struct usb_device *udev, struct usb_interface *iface = usb_ifnum_to_if(udev, cur_alt->desc.bInterfaceNumber); + if (!iface) + return -EINVAL; if (iface->resetting_device) { /* * The USB core just reset the device, so the xHCI host -- cgit v1.2.3 From 75f25bd31d9315ab57e4fb5eba3340452febc48d Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Wed, 3 Aug 2011 13:17:01 +0800 Subject: cpupower: avoid using symlinks Reference the source directly, don't create symlinks. Signed-off-by: WANG Cong Signed-off-by: Dominik Brodowski --- tools/power/cpupower/debug/x86_64/Makefile | 8 ++++---- tools/power/cpupower/debug/x86_64/centrino-decode.c | 1 - tools/power/cpupower/debug/x86_64/powernow-k8-decode.c | 1 - 3 files changed, 4 insertions(+), 6 deletions(-) delete mode 120000 tools/power/cpupower/debug/x86_64/centrino-decode.c delete mode 120000 tools/power/cpupower/debug/x86_64/powernow-k8-decode.c diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile index dbf13998462a..3326217dd311 100644 --- a/tools/power/cpupower/debug/x86_64/Makefile +++ b/tools/power/cpupower/debug/x86_64/Makefile @@ -1,10 +1,10 @@ default: all -centrino-decode: centrino-decode.c - $(CC) $(CFLAGS) -o centrino-decode centrino-decode.c +centrino-decode: ../i386/centrino-decode.c + $(CC) $(CFLAGS) -o $@ $< -powernow-k8-decode: powernow-k8-decode.c - $(CC) $(CFLAGS) -o powernow-k8-decode powernow-k8-decode.c +powernow-k8-decode: ../i386/powernow-k8-decode.c + $(CC) $(CFLAGS) -o $@ $< all: centrino-decode powernow-k8-decode diff --git a/tools/power/cpupower/debug/x86_64/centrino-decode.c b/tools/power/cpupower/debug/x86_64/centrino-decode.c deleted file mode 120000 index 26fb3f1d8fc7..000000000000 --- a/tools/power/cpupower/debug/x86_64/centrino-decode.c +++ /dev/null @@ -1 +0,0 @@ -../i386/centrino-decode.c \ No newline at end of file diff --git a/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c b/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c deleted file mode 120000 index eb30c79cf9df..000000000000 --- a/tools/power/cpupower/debug/x86_64/powernow-k8-decode.c +++ /dev/null @@ -1 +0,0 @@ -../i386/powernow-k8-decode.c \ No newline at end of file -- cgit v1.2.3 From 2dfc818b35cbea59188cc86e86e0a0efce2b0dbe Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 12 Aug 2011 01:11:35 +0200 Subject: cpupower: mperf monitor - Use TSC to calculate max frequency if possible Which makes the implementation independent from cpufreq drivers. Therefore this would also work on a Xen kernel where the hypervisor is doing frequency switching and idle entering. Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/Makefile | 2 +- .../cpupower/utils/idle_monitor/mperf_monitor.c | 177 +++++++++++++++------ 2 files changed, 131 insertions(+), 48 deletions(-) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 94c2cf0a98b8..11521d2f0a4c 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -24,7 +24,7 @@ # Set the following to `true' to make a unstripped, unoptimized # binary. Leave this set to `false' for production use. -DEBUG ?= false +DEBUG ?= true # make the build silent. Set this to something else to make it noisy again. V ?= false diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 63ca87a05e5f..5650ab5a2c20 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -22,12 +22,15 @@ #define MSR_TSC 0x10 +#define MSR_AMD_HWCR 0xc0010015 + enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT }; static int mperf_get_count_percent(unsigned int self_id, double *percent, unsigned int cpu); static int mperf_get_count_freq(unsigned int id, unsigned long long *count, unsigned int cpu); +static struct timespec time_start, time_end; static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { { @@ -54,19 +57,33 @@ static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = { }, }; +enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF }; +static int max_freq_mode; +/* + * The max frequency mperf is ticking at (in C0), either retrieved via: + * 1) calculated after measurements if we know TSC ticks at mperf/P0 frequency + * 2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time + * 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen) + */ +static unsigned long max_frequency; + static unsigned long long tsc_at_measure_start; static unsigned long long tsc_at_measure_end; -static unsigned long max_frequency; static unsigned long long *mperf_previous_count; static unsigned long long *aperf_previous_count; static unsigned long long *mperf_current_count; static unsigned long long *aperf_current_count; + /* valid flag for all CPUs. If a MSR read failed it will be zero */ static int *is_valid; static int mperf_get_tsc(unsigned long long *tsc) { - return read_msr(0, MSR_TSC, tsc); + int ret; + ret = read_msr(0, MSR_TSC, tsc); + if (ret) + dprint("Reading TSC MSR failed, returning %llu\n", *tsc); + return ret; } static int mperf_init_stats(unsigned int cpu) @@ -97,36 +114,11 @@ static int mperf_measure_stats(unsigned int cpu) return 0; } -/* - * get_average_perf() - * - * Returns the average performance (also considers boosted frequencies) - * - * Input: - * aperf_diff: Difference of the aperf register over a time period - * mperf_diff: Difference of the mperf register over the same time period - * max_freq: Maximum frequency (P0) - * - * Returns: - * Average performance over the time period - */ -static unsigned long get_average_perf(unsigned long long aperf_diff, - unsigned long long mperf_diff) -{ - unsigned int perf_percent = 0; - if (((unsigned long)(-1) / 100) < aperf_diff) { - int shift_count = 7; - aperf_diff >>= shift_count; - mperf_diff >>= shift_count; - } - perf_percent = (aperf_diff * 100) / mperf_diff; - return (max_frequency * perf_percent) / 100; -} - static int mperf_get_count_percent(unsigned int id, double *percent, unsigned int cpu) { unsigned long long aperf_diff, mperf_diff, tsc_diff; + unsigned long long timediff; if (!is_valid[cpu]) return -1; @@ -136,11 +128,19 @@ static int mperf_get_count_percent(unsigned int id, double *percent, mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; - tsc_diff = tsc_at_measure_end - tsc_at_measure_start; - *percent = 100.0 * mperf_diff / tsc_diff; - dprint("%s: mperf_diff: %llu, tsc_diff: %llu\n", - mperf_cstates[id].name, mperf_diff, tsc_diff); + if (max_freq_mode == MAX_FREQ_TSC_REF) { + tsc_diff = tsc_at_measure_end - tsc_at_measure_start; + *percent = 100.0 * mperf_diff / tsc_diff; + dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n", + mperf_cstates[id].name, mperf_diff, tsc_diff); + } else if (max_freq_mode == MAX_FREQ_SYSFS) { + timediff = timespec_diff_us(time_start, time_end); + *percent = 100.0 * mperf_diff / timediff; + dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n", + mperf_cstates[id].name, mperf_diff, timediff); + } else + return -1; if (id == Cx) *percent = 100.0 - *percent; @@ -154,7 +154,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent, static int mperf_get_count_freq(unsigned int id, unsigned long long *count, unsigned int cpu) { - unsigned long long aperf_diff, mperf_diff; + unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff; if (id != AVG_FREQ) return 1; @@ -165,11 +165,21 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count, mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu]; aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu]; - /* Return MHz for now, might want to return KHz if column width is more - generic */ - *count = get_average_perf(aperf_diff, mperf_diff) / 1000; - dprint("%s: %llu\n", mperf_cstates[id].name, *count); + if (max_freq_mode == MAX_FREQ_TSC_REF) { + /* Calculate max_freq from TSC count */ + tsc_diff = tsc_at_measure_end - tsc_at_measure_start; + time_diff = timespec_diff_us(time_start, time_end); + max_frequency = tsc_diff / time_diff; + } + *count = max_frequency * ((double)aperf_diff / mperf_diff); + dprint("%s: Average freq based on %s maximum frequency:\n", + mperf_cstates[id].name, + (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read"); + dprint("%max_frequency: %lu", max_frequency); + dprint("aperf_diff: %llu\n", aperf_diff); + dprint("mperf_diff: %llu\n", mperf_diff); + dprint("avg freq: %llu\n", *count); return 0; } @@ -178,6 +188,7 @@ static int mperf_start(void) int cpu; unsigned long long dbg; + clock_gettime(CLOCK_REALTIME, &time_start); mperf_get_tsc(&tsc_at_measure_start); for (cpu = 0; cpu < cpu_count; cpu++) @@ -193,32 +204,104 @@ static int mperf_stop(void) unsigned long long dbg; int cpu; - mperf_get_tsc(&tsc_at_measure_end); - for (cpu = 0; cpu < cpu_count; cpu++) mperf_measure_stats(cpu); + mperf_get_tsc(&tsc_at_measure_end); + clock_gettime(CLOCK_REALTIME, &time_end); + mperf_get_tsc(&dbg); dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end); return 0; } -struct cpuidle_monitor mperf_monitor; - -struct cpuidle_monitor *mperf_register(void) +/* + * Mperf register is defined to tick at P0 (maximum) frequency + * + * Instead of reading out P0 which can be tricky to read out from HW, + * we use TSC counter if it reliably ticks at P0/mperf frequency. + * + * Still try to fall back to: + * /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq + * on older Intel HW without invariant TSC feature. + * Or on AMD machines where TSC does not tick at P0 (do not exist yet, but + * it's still double checked (MSR_AMD_HWCR)). + * + * On these machines the user would still get useful mperf + * stats when acpi-cpufreq driver is loaded. + */ +static int init_maxfreq_mode(void) { + int ret; + unsigned long long hwcr; unsigned long min; - if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)) - return NULL; - - /* Assume min/max all the same on all cores */ + if (!cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC) + goto use_sysfs; + + if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) { + /* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf + * freq. + * A test whether hwcr is accessable/available would be: + * (cpupower_cpu_info.family > 0x10 || + * cpupower_cpu_info.family == 0x10 && + * cpupower_cpu_info.model >= 0x2)) + * This should be the case for all aperf/mperf + * capable AMD machines and is therefore safe to test here. + * Compare with Linus kernel git commit: acf01734b1747b1ec4 + */ + ret = read_msr(0, MSR_AMD_HWCR, &hwcr); + /* + * If the MSR read failed, assume a Xen system that did + * not explicitly provide access to it and assume TSC works + */ + if (ret != 0) { + dprint("TSC read 0x%x failed - assume TSC working\n", + MSR_AMD_HWCR); + return 0; + } else if (1 & (hwcr >> 24)) { + max_freq_mode = MAX_FREQ_TSC_REF; + return 0; + } else { /* Use sysfs max frequency if available */ } + } else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) { + /* + * On Intel we assume mperf (in C0) is ticking at same + * rate than TSC + */ + max_freq_mode = MAX_FREQ_TSC_REF; + return 0; + } +use_sysfs: if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) { dprint("Cannot retrieve max freq from cpufreq kernel " "subsystem\n"); - return NULL; + return -1; } + max_freq_mode = MAX_FREQ_SYSFS; + return 0; +} + +/* + * This monitor provides: + * + * 1) Average frequency a CPU resided in + * This always works if the CPU has aperf/mperf capabilities + * + * 2) C0 and Cx (any sleep state) time a CPU resided in + * Works if mperf timer stops ticking in sleep states which + * seem to be the case on all current HW. + * Both is directly retrieved from HW registers and is independent + * from kernel statistics. + */ +struct cpuidle_monitor mperf_monitor; +struct cpuidle_monitor *mperf_register(void) +{ + if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF)) + return NULL; + + if (init_maxfreq_mode()) + return NULL; /* Free this at program termination */ is_valid = calloc(cpu_count, sizeof(int)); -- cgit v1.2.3 From 88f984e0e235f82a5d34f4a99244eeb14e1413e0 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 12 Aug 2011 01:11:36 +0200 Subject: cpupower: Do not show an empty Idle_Stats monitor if no idle driver is available By taking error values of: sysfs_get_idlestate_count(..); into account. Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c index d048b96a6155..bcd22a1a3970 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c +++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c @@ -134,7 +134,7 @@ static struct cpuidle_monitor *cpuidle_register(void) /* Assume idle state count is the same for all CPUs */ cpuidle_sysfs_monitor.hw_states_num = sysfs_get_idlestate_count(0); - if (cpuidle_sysfs_monitor.hw_states_num == 0) + if (cpuidle_sysfs_monitor.hw_states_num <= 0) return NULL; for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) { -- cgit v1.2.3 From 7c74d2bc5a9d43d33d6f16c1e706147162e2bc52 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 12 Aug 2011 01:11:37 +0200 Subject: cpupower: Better detect offlined CPUs Before, checking for offlined CPUs was done dirty and it was checked whether topology parsing returned -1 values. But this is a valid case on a Xen (and possibly other) kernels. Do proper online/offline checking, also take CONFIG_HOTPLUG_CPU option into account (no /sys/devices/../cpuX/online file). Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/utils/helpers/helpers.h | 3 ++ tools/power/cpupower/utils/helpers/sysfs.c | 50 ++++++++++++++++++++++ tools/power/cpupower/utils/helpers/sysfs.h | 2 + tools/power/cpupower/utils/helpers/topology.c | 5 ++- .../cpupower/utils/idle_monitor/cpupower-monitor.c | 10 +++-- 5 files changed, 66 insertions(+), 4 deletions(-) diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 592ee362b877..7a83022733b2 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -96,6 +96,9 @@ struct cpupower_topology { int pkg; int core; int cpu; + + /* flags */ + unsigned int is_online:1; } *core_info; }; diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c index 55e2466674c6..c6343024a611 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.c +++ b/tools/power/cpupower/utils/helpers/sysfs.c @@ -56,6 +56,56 @@ static unsigned int sysfs_write_file(const char *path, return (unsigned int) numwrite; } +/* + * Detect whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * negative errno values in error case + */ +int sysfs_is_cpu_online(unsigned int cpu) +{ + char path[SYSFS_PATH_MAX]; + int fd; + ssize_t numread; + unsigned long long value; + char linebuf[MAX_LINE_LEN]; + char *endp; + struct stat statbuf; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu); + + if (stat(path, &statbuf) != 0) + return 0; + + /* + * kernel without CONFIG_HOTPLUG_CPU + * -> cpuX directory exists, but not cpuX/online file + */ + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu); + if (stat(path, &statbuf) != 0) + return 1; + + fd = open(path, O_RDONLY); + if (fd == -1) + return -errno; + + numread = read(fd, linebuf, MAX_LINE_LEN - 1); + if (numread < 1) { + close(fd); + return -EIO; + } + linebuf[numread] = '\0'; + close(fd); + + value = strtoull(linebuf, &endp, 0); + if (value > 1 || value < 0) + return -EINVAL; + + return value; +} + /* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */ /* diff --git a/tools/power/cpupower/utils/helpers/sysfs.h b/tools/power/cpupower/utils/helpers/sysfs.h index f9373e090637..8cb797bbceb0 100644 --- a/tools/power/cpupower/utils/helpers/sysfs.h +++ b/tools/power/cpupower/utils/helpers/sysfs.h @@ -7,6 +7,8 @@ extern unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen); +extern int sysfs_is_cpu_online(unsigned int cpu); + extern unsigned long sysfs_get_idlestate_latency(unsigned int cpu, unsigned int idlestate); extern unsigned long sysfs_get_idlestate_usage(unsigned int cpu, diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c index 385ee5c7570c..4eae2c47ba48 100644 --- a/tools/power/cpupower/utils/helpers/topology.c +++ b/tools/power/cpupower/utils/helpers/topology.c @@ -41,6 +41,8 @@ struct cpuid_core_info { unsigned int pkg; unsigned int thread; unsigned int cpu; + /* flags */ + unsigned int is_online:1; }; static int __compare(const void *t1, const void *t2) @@ -78,6 +80,8 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) return -ENOMEM; cpu_top->pkgs = cpu_top->cores = 0; for (cpu = 0; cpu < cpus; cpu++) { + cpu_top->core_info[cpu].cpu = cpu; + cpu_top->core_info[cpu].is_online = sysfs_is_cpu_online(cpu); cpu_top->core_info[cpu].pkg = sysfs_topology_read_file(cpu, "physical_package_id"); if ((int)cpu_top->core_info[cpu].pkg != -1 && @@ -85,7 +89,6 @@ int get_cpu_topology(struct cpupower_topology *cpu_top) cpu_top->pkgs = cpu_top->core_info[cpu].pkg; cpu_top->core_info[cpu].core = sysfs_topology_read_file(cpu, "core_id"); - cpu_top->core_info[cpu].cpu = cpu; } cpu_top->pkgs++; diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index ba4bf068380d..dd8e1ea6e6f2 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -190,9 +190,13 @@ void print_results(int topology_depth, int cpu) } } } - /* cpu offline */ - if (cpu_top.core_info[cpu].pkg == -1 || - cpu_top.core_info[cpu].core == -1) { + /* + * The monitor could still provide useful data, for example + * AMD HW counters partly sit in PCI config space. + * It's up to the monitor plug-in to check .is_online, this one + * is just for additional info. + */ + if (!cpu_top.core_info[cpu].is_online) { printf(_(" *is offline\n")); return; } else -- cgit v1.2.3 From 9ee31f618a3c8209b2bd4bedd71fd5f2be7786bd Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Fri, 12 Aug 2011 01:11:38 +0200 Subject: cpupower: Make monitor command -c/--cpu aware This allows for example: cpupower -c 2-4,6 monitor -m Mperf |Mperf PKG |CORE|CPU | C0 | Cx | Freq 0| 8| 4| 2.42| 97.58| 1353 0| 16| 2| 14.38| 85.62| 1928 0| 24| 6| 1.76| 98.24| 1442 1| 16| 3| 15.53| 84.47| 1650 CPUs always get resorted for package, core then cpu id if it could get read out (or however you name these topology levels...). Still this is a nice way to keep the overview if a test binary is bound to a specific CPU or if one wants to show all CPUs inside a package or similar. Still missing: Do not measure not available cores to reduce the overhead and achieve better results. Signed-off-by: Thomas Renninger Signed-off-by: Dominik Brodowski --- tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index dd8e1ea6e6f2..6cb8d9e6bb6b 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -149,6 +149,10 @@ void print_results(int topology_depth, int cpu) unsigned long long result; cstate_t s; + /* Be careful CPUs may got resorted for pkg value do not just use cpu */ + if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu)) + return; + if (topology_depth > 2) printf("%4d|", cpu_top.core_info[cpu].pkg); if (topology_depth > 1) @@ -389,6 +393,10 @@ int cmd_monitor(int argc, char **argv) return EXIT_FAILURE; } + /* Default is: monitor all CPUs */ + if (bitmask_isallclear(cpus_chosen)) + bitmask_setall(cpus_chosen); + dprint("System has up to %d CPU cores\n", cpu_count); for (num = 0; all_monitors[num]; num++) { -- cgit v1.2.3 From aaa6fd2a004147bf32fce05720938236de3361d9 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Fri, 12 Aug 2011 12:11:33 +0200 Subject: Not all systems expose a firmware or platform mechanism for changing the backlight intensity on i915, so add native driver support. Signed-off-by: Matthew Garrett Cc: Richard Purdie Cc: Chris Wilson Cc: David Airlie Cc: Alex Deucher Cc: Ben Skeggs Cc: Zhang Rui Cc: Len Brown Cc: Jesse Barnes Tested-by: Sedat Dilek Tested-by: Michel Alexandre Salim Tested-by: Kamal Mostafa Signed-off-by: Andrew Morton Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/i915_drv.h | 4 ++ drivers/gpu/drm/i915/intel_dp.c | 7 ++++ drivers/gpu/drm/i915/intel_drv.h | 3 +- drivers/gpu/drm/i915/intel_lvds.c | 5 +++ drivers/gpu/drm/i915/intel_opregion.c | 1 - drivers/gpu/drm/i915/intel_panel.c | 72 ++++++++++++++++++++++++++++++++++- 6 files changed, 89 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index feb4f164fd1b..7916bd97d5c1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -36,6 +36,7 @@ #include #include #include +#include /* General customization: */ @@ -690,6 +691,7 @@ typedef struct drm_i915_private { int child_dev_num; struct child_device_config *child_dev; struct drm_connector *int_lvds_connector; + struct drm_connector *int_edp_connector; bool mchbar_need_disable; @@ -723,6 +725,8 @@ typedef struct drm_i915_private { /* list of fbdev register on this device */ struct intel_fbdev *fbdev; + struct backlight_device *backlight; + struct drm_property *broadcast_rgb_property; struct drm_property *force_audio_property; diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 0feae908bb37..44fef5e1c490 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1841,6 +1841,11 @@ done: static void intel_dp_destroy (struct drm_connector *connector) { + struct drm_device *dev = connector->dev; + + if (intel_dpd_is_edp(dev)) + intel_panel_destroy_backlight(dev); + drm_sysfs_connector_remove(connector); drm_connector_cleanup(connector); kfree(connector); @@ -2072,6 +2077,8 @@ intel_dp_init(struct drm_device *dev, int output_reg) DRM_MODE_TYPE_PREFERRED; } } + dev_priv->int_edp_connector = connector; + intel_panel_setup_backlight(dev); } intel_dp_add_properties(intel_dp, connector); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7b330e76a435..0b2ee9d39980 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -297,9 +297,10 @@ extern void intel_pch_panel_fitting(struct drm_device *dev, extern u32 intel_panel_get_max_backlight(struct drm_device *dev); extern u32 intel_panel_get_backlight(struct drm_device *dev); extern void intel_panel_set_backlight(struct drm_device *dev, u32 level); -extern void intel_panel_setup_backlight(struct drm_device *dev); +extern int intel_panel_setup_backlight(struct drm_device *dev); extern void intel_panel_enable_backlight(struct drm_device *dev); extern void intel_panel_disable_backlight(struct drm_device *dev); +extern void intel_panel_destroy_backlight(struct drm_device *dev); extern enum drm_connector_status intel_panel_detect(struct drm_device *dev); extern void intel_crtc_load_lut(struct drm_crtc *crtc); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 8b521a289b29..31da77f5c051 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -552,6 +552,8 @@ static void intel_lvds_destroy(struct drm_connector *connector) struct drm_device *dev = connector->dev; struct drm_i915_private *dev_priv = dev->dev_private; + intel_panel_destroy_backlight(dev); + if (dev_priv->lid_notifier.notifier_call) acpi_lid_notifier_unregister(&dev_priv->lid_notifier); drm_sysfs_connector_remove(connector); @@ -1032,6 +1034,9 @@ out: /* keep the LVDS connector */ dev_priv->int_lvds_connector = connector; drm_sysfs_connector_add(connector); + + intel_panel_setup_backlight(dev); + return true; failed: diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c index b7c5ddb564d1..b8e8158bb16e 100644 --- a/drivers/gpu/drm/i915/intel_opregion.c +++ b/drivers/gpu/drm/i915/intel_opregion.c @@ -227,7 +227,6 @@ void intel_opregion_asle_intr(struct drm_device *dev) asle->aslc = asle_stat; } -/* Only present on Ironlake+ */ void intel_opregion_gse_intr(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; diff --git a/drivers/gpu/drm/i915/intel_panel.c b/drivers/gpu/drm/i915/intel_panel.c index 05f500cd9c24..a9e0c7bcd317 100644 --- a/drivers/gpu/drm/i915/intel_panel.c +++ b/drivers/gpu/drm/i915/intel_panel.c @@ -277,7 +277,7 @@ void intel_panel_enable_backlight(struct drm_device *dev) dev_priv->backlight_enabled = true; } -void intel_panel_setup_backlight(struct drm_device *dev) +static void intel_panel_init_backlight(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -309,3 +309,73 @@ intel_panel_detect(struct drm_device *dev) return connector_status_unknown; } + +#ifdef CONFIG_BACKLIGHT_CLASS_DEVICE +static int intel_panel_update_status(struct backlight_device *bd) +{ + struct drm_device *dev = bl_get_data(bd); + intel_panel_set_backlight(dev, bd->props.brightness); + return 0; +} + +static int intel_panel_get_brightness(struct backlight_device *bd) +{ + struct drm_device *dev = bl_get_data(bd); + return intel_panel_get_backlight(dev); +} + +static const struct backlight_ops intel_panel_bl_ops = { + .update_status = intel_panel_update_status, + .get_brightness = intel_panel_get_brightness, +}; + +int intel_panel_setup_backlight(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct backlight_properties props; + struct drm_connector *connector; + + intel_panel_init_backlight(dev); + + if (dev_priv->int_lvds_connector) + connector = dev_priv->int_lvds_connector; + else if (dev_priv->int_edp_connector) + connector = dev_priv->int_edp_connector; + else + return -ENODEV; + + props.type = BACKLIGHT_RAW; + props.max_brightness = intel_panel_get_max_backlight(dev); + dev_priv->backlight = + backlight_device_register("intel_backlight", + &connector->kdev, dev, + &intel_panel_bl_ops, &props); + + if (IS_ERR(dev_priv->backlight)) { + DRM_ERROR("Failed to register backlight: %ld\n", + PTR_ERR(dev_priv->backlight)); + dev_priv->backlight = NULL; + return -ENODEV; + } + dev_priv->backlight->props.brightness = intel_panel_get_backlight(dev); + return 0; +} + +void intel_panel_destroy_backlight(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + if (dev_priv->backlight) + backlight_device_unregister(dev_priv->backlight); +} +#else +int intel_panel_setup_backlight(struct drm_device *dev) +{ + intel_panel_init_backlight(dev); + return 0; +} + +void intel_panel_destroy_backlight(struct drm_device *dev) +{ + return; +} +#endif -- cgit v1.2.3 From c3613de92ebea302137d21d8938421c3f88d8741 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Fri, 12 Aug 2011 17:05:54 -0700 Subject: drm/i915: Can't do accurate vblank timestamps with UMS Disable this feature when KMS is not running by setting the driver->get_vblank_timestamp function pointer to NULL. Signed-off-by: Keith Packard Tested-by: Justin P. Mattock --- drivers/gpu/drm/i915/i915_irq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 02f96fd0d52d..9cbb0cd8f46a 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2058,8 +2058,10 @@ void intel_irq_init(struct drm_device *dev) dev->driver->get_vblank_counter = gm45_get_vblank_counter; } - - dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; + if (drm_core_check_feature(dev, DRIVER_MODESET)) + dev->driver->get_vblank_timestamp = i915_get_vblank_timestamp; + else + dev->driver->get_vblank_timestamp = NULL; dev->driver->get_scanout_position = i915_get_crtc_scanoutpos; if (IS_IVYBRIDGE(dev)) { -- cgit v1.2.3 From 92b79f4322b8a2506bdd862f554a2a81ff0a2dad Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Fri, 12 Aug 2011 17:07:18 -0700 Subject: drm/i915: Cannot set clock gating under UMS The clock gating functions are only assigned under KMS, so don't try to call them under UMS. Signed-off-by: Keith Packard Tested-by: Justin P. Mattock --- drivers/gpu/drm/i915/i915_suspend.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 87677d60d0df..f10742359ec9 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -871,7 +871,8 @@ int i915_restore_state(struct drm_device *dev) } mutex_unlock(&dev->struct_mutex); - intel_init_clock_gating(dev); + if (drm_core_check_feature(dev, DRIVER_MODESET)) + intel_init_clock_gating(dev); if (IS_IRONLAKE_M(dev)) { ironlake_enable_drps(dev); -- cgit v1.2.3 From 4853abaae7e4a2af938115ce9071ef8684fb7af4 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Mon, 15 Aug 2011 21:37:25 +0200 Subject: block: fix flush machinery for stacking drivers with differring flush flags Commit ae1b1539622fb46e51b4d13b3f9e5f4c713f86ae, block: reimplement FLUSH/FUA to support merge, introduced a performance regression when running any sort of fsyncing workload using dm-multipath and certain storage (in our case, an HP EVA). The test I ran was fs_mark, and it dropped from ~800 files/sec on ext4 to ~100 files/sec. It turns out that dm-multipath always advertised flush+fua support, and passed commands on down the stack, where those flags used to get stripped off. The above commit changed that behavior: static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; while (1) { - while (!list_empty(&q->queue_head)) { + if (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || - (rq->cmd_flags & REQ_FLUSH_SEQ)) - return rq; - rq = blk_do_flush(q, rq); - if (rq) - return rq; + return rq; } Note that previously, a command would come in here, have REQ_FLUSH|REQ_FUA set, and then get handed off to blk_do_flush: struct request *blk_do_flush(struct request_queue *q, struct request *rq) { unsigned int fflags = q->flush_flags; /* may change, cache it */ bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); unsigned skip = 0; ... if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { rq->cmd_flags &= ~REQ_FLUSH; if (!has_fua) rq->cmd_flags &= ~REQ_FUA; return rq; } So, the flush machinery was bypassed in such cases (q->flush_flags == 0 && rq->cmd_flags & (REQ_FLUSH|REQ_FUA)). Now, however, we don't get into the flush machinery at all. Instead, __elv_next_request just hands a request with flush and fua bits set to the scsi_request_fn, even if the underlying request_queue does not support flush or fua. The agreed upon approach is to fix the flush machinery to allow stacking. While this isn't used in practice (since there is only one request-based dm target, and that target will now reflect the flush flags of the underlying device), it does future-proof the solution, and make it function as designed. In order to make this work, I had to add a field to the struct request, inside the flush structure (to store the original req->end_io). Shaohua had suggested overloading the union with rb_node and completion_data, but the completion data is used by device mapper and can also be used by other drivers. So, I didn't see a way around the additional field. I tested this patch on an HP EVA with both ext4 and xfs, and it recovers the lost performance. Comments and other testers, as always, are appreciated. Cheers, Jeff Signed-off-by: Jeff Moyer Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 8 ++++++-- block/blk-flush.c | 20 ++++++++++++++++---- block/blk.h | 2 ++ include/linux/blkdev.h | 1 + 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index b850bedad229..7c59b0f5eae8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1700,6 +1700,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits); int blk_insert_cloned_request(struct request_queue *q, struct request *rq) { unsigned long flags; + int where = ELEVATOR_INSERT_BACK; if (blk_rq_check_limits(q, rq)) return -EIO; @@ -1716,7 +1717,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) */ BUG_ON(blk_queued_rq(rq)); - add_acct_request(q, rq, ELEVATOR_INSERT_BACK); + if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA)) + where = ELEVATOR_INSERT_FLUSH; + + add_acct_request(q, rq, where); spin_unlock_irqrestore(q->queue_lock, flags); return 0; @@ -2273,7 +2277,7 @@ static bool blk_end_bidi_request(struct request *rq, int error, * %false - we are done with this request * %true - still buffers pending for this request **/ -static bool __blk_end_bidi_request(struct request *rq, int error, +bool __blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes) { if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) diff --git a/block/blk-flush.c b/block/blk-flush.c index 2d162bd840d3..491eb30a242d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -123,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq) /* make @rq a normal request */ rq->cmd_flags &= ~REQ_FLUSH_SEQ; - rq->end_io = NULL; + rq->end_io = rq->flush.saved_end_io; } /** @@ -301,9 +301,6 @@ void blk_insert_flush(struct request *rq) unsigned int fflags = q->flush_flags; /* may change, cache */ unsigned int policy = blk_flush_policy(fflags, rq); - BUG_ON(rq->end_io); - BUG_ON(!rq->bio || rq->bio != rq->biotail); - /* * @policy now records what operations need to be done. Adjust * REQ_FLUSH and FUA for the driver. @@ -312,6 +309,19 @@ void blk_insert_flush(struct request *rq) if (!(fflags & REQ_FUA)) rq->cmd_flags &= ~REQ_FUA; + /* + * An empty flush handed down from a stacking driver may + * translate into nothing if the underlying device does not + * advertise a write-back cache. In this case, simply + * complete the request. + */ + if (!policy) { + __blk_end_bidi_request(rq, 0, 0, 0); + return; + } + + BUG_ON(!rq->bio || rq->bio != rq->biotail); + /* * If there's data but flush is not necessary, the request can be * processed directly without going through flush machinery. Queue @@ -320,6 +330,7 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { list_add_tail(&rq->queuelist, &q->queue_head); + blk_run_queue_async(q); return; } @@ -330,6 +341,7 @@ void blk_insert_flush(struct request *rq) memset(&rq->flush, 0, sizeof(rq->flush)); INIT_LIST_HEAD(&rq->flush.list); rq->cmd_flags |= REQ_FLUSH_SEQ; + rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ rq->end_io = flush_data_end_io; blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); diff --git a/block/blk.h b/block/blk.h index d6586287adc9..20b900a377c9 100644 --- a/block/blk.h +++ b/block/blk.h @@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio); void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); +bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes); void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 847928546076..84b15d54f8c2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,6 +118,7 @@ struct request { struct { unsigned int seq; struct list_head list; + rq_end_io_fn *saved_end_io; } flush; }; -- cgit v1.2.3 From 795858dbd253462a67e14272edeaae73c6074b17 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 15 Aug 2011 13:02:37 -0700 Subject: ceph: fix encoding of ino only (not relative) paths A 'path' consists of a starting ino and relative component. Encode even when there is no relative component. This is primarily needed by the NFS reexport code. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index fee028b5332e..86c59e16ba74 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1595,7 +1595,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath); dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, *ppath); - } else if (rpath) { + } else if (rpath || rino) { *ino = rino; *ppath = rpath; *pathlen = strlen(rpath); -- cgit v1.2.3 From a0fba3eb059e73fed2d376a901f8117734c12f1f Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Mon, 15 Aug 2011 10:10:31 +0000 Subject: sparc64: remove unnecessary macros from spinlock_64.h The sparc64 spinlock_64.h contains a number of operations defined first as static inline functions, and then as macros with the same names and parameters as the functions. Maybe this was needed at some point in the past, but now nothing seems to depend on these macros (checked with a recursive grep looking for ifdefs on these names). Other archs don't define these identity-macros. So this patch deletes these unnecessary macros. Compile-tested with sparc64_defconfig. Signed-off-by: Mikael Pettersson Signed-off-by: David S. Miller --- arch/sparc/include/asm/spinlock_64.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 073936a8b275..968917694978 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -210,14 +210,8 @@ static int inline arch_write_trylock(arch_rwlock_t *lock) return result; } -#define arch_read_lock(p) arch_read_lock(p) #define arch_read_lock_flags(p, f) arch_read_lock(p) -#define arch_read_trylock(p) arch_read_trylock(p) -#define arch_read_unlock(p) arch_read_unlock(p) -#define arch_write_lock(p) arch_write_lock(p) #define arch_write_lock_flags(p, f) arch_write_lock(p) -#define arch_write_unlock(p) arch_write_unlock(p) -#define arch_write_trylock(p) arch_write_trylock(p) #define arch_read_can_lock(rw) (!((rw)->lock & 0x80000000UL)) #define arch_write_can_lock(rw) (!(rw)->lock) -- cgit v1.2.3 From 3f6aa0b113846a8628baa649af422cfc6fb1d786 Mon Sep 17 00:00:00 2001 From: Mikael Pettersson Date: Mon, 15 Aug 2011 10:11:50 +0000 Subject: sparc32: unbreak arch_write_unlock() The sparc32 version of arch_write_unlock() is just a plain assignment. Unfortunately this allows the compiler to schedule side-effects in a protected region to occur after the HW-level unlock, which is broken. E.g., the following trivial test case gets miscompiled: #include rwlock_t lock; int counter; void foo(void) { write_lock(&lock); ++counter; write_unlock(&lock); } Fixed by adding a compiler memory barrier to arch_write_unlock(). The sparc64 version combines the barrier and assignment into a single asm(), and implements the operation as a static inline, so that's what I did too. Compile-tested with sparc32_defconfig + CONFIG_SMP=y. Signed-off-by: Mikael Pettersson Signed-off-by: David S. Miller --- arch/sparc/include/asm/spinlock_32.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index 5f5b8bf3f50d..bcc98fc35281 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -131,6 +131,15 @@ static inline void arch_write_lock(arch_rwlock_t *rw) *(volatile __u32 *)&lp->lock = ~0U; } +static void inline arch_write_unlock(arch_rwlock_t *lock) +{ + __asm__ __volatile__( +" st %%g0, [%0]" + : /* no outputs */ + : "r" (lock) + : "memory"); +} + static inline int arch_write_trylock(arch_rwlock_t *rw) { unsigned int val; @@ -175,8 +184,6 @@ static inline int __arch_read_trylock(arch_rwlock_t *rw) res; \ }) -#define arch_write_unlock(rw) do { (rw)->lock = 0; } while(0) - #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_read_lock_flags(rw, flags) arch_read_lock(rw) #define arch_write_lock_flags(rw, flags) arch_write_lock(rw) -- cgit v1.2.3 From 178a29600340bef5b13cd4157053679debe35351 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 15 Aug 2011 14:45:17 -0700 Subject: sparc64: Set HAVE_C_RECORDMCOUNT Signed-off-by: David S. Miller --- arch/sparc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 42c67beadcae..1a6f20d4e7e6 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -55,6 +55,7 @@ config SPARC64 select PERF_USE_VMALLOC select IRQ_PREFLOW_FASTEOI select ARCH_HAVE_NMI_SAFE_CMPXCHG + select HAVE_C_RECORDMCOUNT config ARCH_DEFCONFIG string -- cgit v1.2.3 From cedf03bd9aa54d1d7a9065dddc9e76505f476b12 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 15 Aug 2011 10:18:46 -0700 Subject: x86: fix mm/fault.c build arch/x86/mm/fault.c needs to include asm/vsyscall.h to fix a build error: arch/x86/mm/fault.c: In function '__bad_area_nosemaphore': arch/x86/mm/fault.c:728: error: 'VSYSCALL_START' undeclared (first use in this function) Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 247aae3dc008..0d17c8c50acd 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -17,6 +17,7 @@ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ #include /* kmemcheck_*(), ... */ +#include /* * Page fault error code bits: -- cgit v1.2.3 From b5ddbf465f3675b19c8f5528b4064cbf278a5c6f Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 16 Aug 2011 09:36:06 +1000 Subject: regmap: using module facilities requires module.h Commit b33f9cbd67ba ("regmap: Specify a module license") added a MODULES_LICENSE to this file without adding an include of module.h. module.h should have been included anyway, since this file has EXPORT_SYMBOLs as well. With the pending module.h split up, this would probably have caused build problems. Cc: Stephen Warren Cc: Mark Brown Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- drivers/base/regmap/regmap-spi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/regmap/regmap-spi.c b/drivers/base/regmap/regmap-spi.c index 2bbc65999a5f..f8396945d6ed 100644 --- a/drivers/base/regmap/regmap-spi.c +++ b/drivers/base/regmap/regmap-spi.c @@ -13,6 +13,7 @@ #include #include #include +#include static int regmap_spi_write(struct device *dev, const void *data, size_t count) { -- cgit v1.2.3 From 18adad1c57f820d38d05e3d5e3d548e286233b76 Mon Sep 17 00:00:00 2001 From: Gerard Braad Date: Tue, 16 Aug 2011 00:17:56 -0700 Subject: Input: wacom - add support for the Wacom Bamboo Pen (CTL-660/K) Signed-off-by: Gerard Braad Reviewed-by: Chris Bagwell Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_wac.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 03ebcc8b24b5..c1c2f7b28d89 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -1460,6 +1460,9 @@ static const struct wacom_features wacom_features_0xD3 = static const struct wacom_features wacom_features_0xD4 = { "Wacom Bamboo Pen", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023, 63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0xD5 = + { "Wacom Bamboo Pen 6x8", WACOM_PKGLEN_BBFUN, 21648, 13530, 1023, + 63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0xD6 = { "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023, 63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -1564,6 +1567,7 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0xD2) }, { USB_DEVICE_WACOM(0xD3) }, { USB_DEVICE_WACOM(0xD4) }, + { USB_DEVICE_WACOM(0xD5) }, { USB_DEVICE_WACOM(0xD6) }, { USB_DEVICE_WACOM(0xD7) }, { USB_DEVICE_WACOM(0xD8) }, -- cgit v1.2.3 From a417ea4432db7fd1c91c19b129a3e3d2367b7ce4 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 16 Aug 2011 00:17:56 -0700 Subject: Input: wacom - add WAC_MSG_RETRIES define Use WAC_MSG_RETRIES define instead of a numeric constant. Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_sys.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c index 449c0a46dbac..9879c73ee517 100644 --- a/drivers/input/tablet/wacom_sys.c +++ b/drivers/input/tablet/wacom_sys.c @@ -49,6 +49,7 @@ struct hid_descriptor { #define USB_REQ_GET_REPORT 0x01 #define USB_REQ_SET_REPORT 0x09 #define WAC_HID_FEATURE_REPORT 0x03 +#define WAC_MSG_RETRIES 5 static int usb_get_report(struct usb_interface *intf, unsigned char type, unsigned char id, void *buf, int size) @@ -165,7 +166,7 @@ static int wacom_parse_hid(struct usb_interface *intf, struct hid_descriptor *hi report, hid_desc->wDescriptorLength, 5000); /* 5 secs */ - } while (result < 0 && limit++ < 5); + } while (result < 0 && limit++ < WAC_MSG_RETRIES); /* No need to parse the Descriptor. It isn't an error though */ if (result < 0) @@ -336,7 +337,7 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat error = usb_get_report(intf, WAC_HID_FEATURE_REPORT, report_id, rep_data, 3); - } while ((error < 0 || rep_data[1] != 4) && limit++ < 5); + } while ((error < 0 || rep_data[1] != 4) && limit++ < WAC_MSG_RETRIES); } else if (features->type != TABLETPC) { do { rep_data[0] = 2; @@ -347,7 +348,7 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat error = usb_get_report(intf, WAC_HID_FEATURE_REPORT, report_id, rep_data, 2); - } while ((error < 0 || rep_data[1] != 2) && limit++ < 5); + } while ((error < 0 || rep_data[1] != 2) && limit++ < WAC_MSG_RETRIES); } kfree(rep_data); -- cgit v1.2.3 From 3b48c91cdf2d6827ce315b3b112310fa02198db0 Mon Sep 17 00:00:00 2001 From: Ping Cheng Date: Tue, 16 Aug 2011 00:17:57 -0700 Subject: Input: wacom - report id 3 returns 4 bytes of data Signed-off-by: Ping Cheng Signed-off-by: Dmitry Torokhov --- drivers/input/tablet/wacom_sys.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/input/tablet/wacom_sys.c b/drivers/input/tablet/wacom_sys.c index 9879c73ee517..d27c9d91630b 100644 --- a/drivers/input/tablet/wacom_sys.c +++ b/drivers/input/tablet/wacom_sys.c @@ -320,23 +320,25 @@ static int wacom_query_tablet_data(struct usb_interface *intf, struct wacom_feat int limit = 0, report_id = 2; int error = -ENOMEM; - rep_data = kmalloc(2, GFP_KERNEL); + rep_data = kmalloc(4, GFP_KERNEL); if (!rep_data) return error; - /* ask to report tablet data if it is 2FGT Tablet PC or + /* ask to report tablet data if it is MT Tablet PC or * not a Tablet PC */ if (features->type == TABLETPC2FG) { do { rep_data[0] = 3; rep_data[1] = 4; + rep_data[2] = 0; + rep_data[3] = 0; report_id = 3; error = usb_set_report(intf, WAC_HID_FEATURE_REPORT, - report_id, rep_data, 2); + report_id, rep_data, 4); if (error >= 0) error = usb_get_report(intf, WAC_HID_FEATURE_REPORT, report_id, - rep_data, 3); + rep_data, 4); } while ((error < 0 || rep_data[1] != 4) && limit++ < WAC_MSG_RETRIES); } else if (features->type != TABLETPC) { do { -- cgit v1.2.3 From c503ad466da44ca23c658986629bf7a2e2eabbb7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Aug 2011 14:23:20 +0200 Subject: ALSA: hda - Fix duplicated capture-volume creation for ALC268 models Fix the duplicated creation of capture-mixer elements for some static ALC268 configurations. The capture mixers must be put to cap_mixer field instead of mixers array. Signed-off-by: Takashi Iwai --- sound/pci/hda/alc268_quirks.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/sound/pci/hda/alc268_quirks.c b/sound/pci/hda/alc268_quirks.c index be58bf2f3aec..2e5876ce71fe 100644 --- a/sound/pci/hda/alc268_quirks.c +++ b/sound/pci/hda/alc268_quirks.c @@ -476,8 +476,8 @@ static const struct snd_pci_quirk alc268_ssid_cfg_tbl[] = { static const struct alc_config_preset alc268_presets[] = { [ALC267_QUANTA_IL1] = { - .mixers = { alc267_quanta_il1_mixer, alc268_beep_mixer, - alc268_capture_nosrc_mixer }, + .mixers = { alc267_quanta_il1_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_nosrc_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc267_quanta_il1_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -492,8 +492,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_3ST] = { - .mixers = { alc268_base_mixer, alc268_capture_alt_mixer, - alc268_beep_mixer }, + .mixers = { alc268_base_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_alt_mixer, .init_verbs = { alc268_base_init_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), .dac_nids = alc268_dac_nids, @@ -507,8 +507,8 @@ static const struct alc_config_preset alc268_presets[] = { .input_mux = &alc268_capture_source, }, [ALC268_TOSHIBA] = { - .mixers = { alc268_toshiba_mixer, alc268_capture_alt_mixer, - alc268_beep_mixer }, + .mixers = { alc268_toshiba_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_alt_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_toshiba_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -525,8 +525,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_ACER] = { - .mixers = { alc268_acer_mixer, alc268_capture_alt_mixer, - alc268_beep_mixer }, + .mixers = { alc268_acer_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_alt_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_acer_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -543,8 +543,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_ACER_DMIC] = { - .mixers = { alc268_acer_dmic_mixer, alc268_capture_alt_mixer, - alc268_beep_mixer }, + .mixers = { alc268_acer_dmic_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_alt_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_acer_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -561,9 +561,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_ACER_ASPIRE_ONE] = { - .mixers = { alc268_acer_aspire_one_mixer, - alc268_beep_mixer, - alc268_capture_nosrc_mixer }, + .mixers = { alc268_acer_aspire_one_mixer, alc268_beep_mixer}, + .cap_mixer = alc268_capture_nosrc_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_acer_aspire_one_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -579,8 +578,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_DELL] = { - .mixers = { alc268_dell_mixer, alc268_beep_mixer, - alc268_capture_nosrc_mixer }, + .mixers = { alc268_dell_mixer, alc268_beep_mixer}, + .cap_mixer = alc268_capture_nosrc_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_dell_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -596,8 +595,8 @@ static const struct alc_config_preset alc268_presets[] = { .init_hook = alc_inithook, }, [ALC268_ZEPTO] = { - .mixers = { alc268_base_mixer, alc268_capture_alt_mixer, - alc268_beep_mixer }, + .mixers = { alc268_base_mixer, alc268_beep_mixer }, + .cap_mixer = alc268_capture_alt_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_toshiba_verbs }, .num_dacs = ARRAY_SIZE(alc268_dac_nids), @@ -616,7 +615,8 @@ static const struct alc_config_preset alc268_presets[] = { }, #ifdef CONFIG_SND_DEBUG [ALC268_TEST] = { - .mixers = { alc268_test_mixer, alc268_capture_mixer }, + .mixers = { alc268_test_mixer }, + .cap_mixer = alc268_capture_mixer, .init_verbs = { alc268_base_init_verbs, alc268_eapd_verbs, alc268_volume_init_verbs, alc268_beep_init_verbs }, -- cgit v1.2.3 From fa71f447065f676157ba6a2c121ba419818fc559 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 8 Aug 2011 11:50:24 -0400 Subject: cifs: demote cERROR in build_path_from_dentry to cFYI Running the cthon tests on a recent kernel caused this message to pop occasionally: CIFS VFS: did not end path lookup where expected namelen is 0 Some added debugging showed that namelen and dfsplen were both 0 when this occurred. That means that the read_seqretry returned true. Assuming that the comment inside the if statement is true, this should be harmless and just means that we raced with a rename. If that is the case, then there's no need for alarm and we can demote this to cFYI. While we're at it, print the dfsplen too so that we can see what happened here if the message pops during debugging. Cc: stable@kernel.org Cc: Al Viro Signed-off-by: Jeff Layton Signed-off-by: Steve French --- fs/cifs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ae576fbb5142..72d448bf96ce 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -105,8 +105,8 @@ cifs_bp_rename_retry: } rcu_read_unlock(); if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) { - cERROR(1, "did not end path lookup where expected namelen is %d", - namelen); + cFYI(1, "did not end path lookup where expected. namelen=%d " + "dfsplen=%d", namelen, dfsplen); /* presumably this is only possible if racing with a rename of one of the parent directories (we can not lock the dentries above us to prevent this, but retrying should be harmless) */ -- cgit v1.2.3 From c3585aa91a25264234c8bd27a4a6823d4e544c2a Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Tue, 16 Aug 2011 14:18:48 +0100 Subject: gma500: kill MIPI interface types Kirill Shutemov found problems with the non-upstream IMG driver where the use of extra DRM encoder/connector types caused random crashes when the DRM layer tried to display their matching name. This removes the MIPI types matching the changes Pauli Nieminen made to the non upstream driver set. As Pauli points out: " MIPI (or DSI) is protocol specification on top of LVDS serial bus. That makes it resonable to call MIPI connectors and encoders LVDS." (and indeed they may also be HDMI convertors or similar when we want to report a more useful to end user result) Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/staging/gma500/mdfld_dsi_dbi.c | 3 ++- drivers/staging/gma500/mdfld_dsi_dbi.h | 3 --- drivers/staging/gma500/mdfld_dsi_dpi.c | 7 ++++++- drivers/staging/gma500/mdfld_dsi_output.c | 4 +++- drivers/staging/gma500/medfield.h | 2 -- drivers/staging/gma500/psb_drv.h | 1 - 6 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/staging/gma500/mdfld_dsi_dbi.c b/drivers/staging/gma500/mdfld_dsi_dbi.c index 02e17c9c8637..fd211f3467c4 100644 --- a/drivers/staging/gma500/mdfld_dsi_dbi.c +++ b/drivers/staging/gma500/mdfld_dsi_dbi.c @@ -711,10 +711,11 @@ struct mdfld_dsi_encoder *mdfld_dsi_dbi_init(struct drm_device *dev, /* Create drm encoder object */ connector = &dsi_connector->base.base; encoder = &dbi_output->base.base; + /* Review this if we ever get MIPI-HDMI bridges or similar */ drm_encoder_init(dev, encoder, p_funcs->encoder_funcs, - DRM_MODE_ENCODER_MIPI); + DRM_MODE_ENCODER_LVDS); drm_encoder_helper_add(encoder, p_funcs->encoder_helper_funcs); /* Attach to given connector */ diff --git a/drivers/staging/gma500/mdfld_dsi_dbi.h b/drivers/staging/gma500/mdfld_dsi_dbi.h index dc6242c51d0b..f0fa986fd934 100644 --- a/drivers/staging/gma500/mdfld_dsi_dbi.h +++ b/drivers/staging/gma500/mdfld_dsi_dbi.h @@ -42,9 +42,6 @@ #include "mdfld_dsi_output.h" #include "mdfld_output.h" -#define DRM_MODE_ENCODER_MIPI 5 - - /* * DBI encoder which inherits from mdfld_dsi_encoder */ diff --git a/drivers/staging/gma500/mdfld_dsi_dpi.c b/drivers/staging/gma500/mdfld_dsi_dpi.c index 6e03a91e947e..e685f1217baa 100644 --- a/drivers/staging/gma500/mdfld_dsi_dpi.c +++ b/drivers/staging/gma500/mdfld_dsi_dpi.c @@ -777,10 +777,15 @@ struct mdfld_dsi_encoder *mdfld_dsi_dpi_init(struct drm_device *dev, /* Create drm encoder object */ connector = &dsi_connector->base.base; encoder = &dpi_output->base.base; + /* + * On existing hardware this will be a panel of some form, + * if future devices also have HDMI bridges this will need + * revisiting + */ drm_encoder_init(dev, encoder, p_funcs->encoder_funcs, - DRM_MODE_ENCODER_MIPI); + DRM_MODE_ENCODER_LVDS); drm_encoder_helper_add(encoder, p_funcs->encoder_helper_funcs); diff --git a/drivers/staging/gma500/mdfld_dsi_output.c b/drivers/staging/gma500/mdfld_dsi_output.c index 7536095c30a0..9050c0f78b15 100644 --- a/drivers/staging/gma500/mdfld_dsi_output.c +++ b/drivers/staging/gma500/mdfld_dsi_output.c @@ -955,7 +955,9 @@ void mdfld_dsi_output_init(struct drm_device *dev, psb_output->type = (pipe == 0) ? INTEL_OUTPUT_MIPI : INTEL_OUTPUT_MIPI2; connector = &psb_output->base; - drm_connector_init(dev, connector, &mdfld_dsi_connector_funcs, DRM_MODE_CONNECTOR_MIPI); + /* Revisit type if MIPI/HDMI bridges ever appear on Medfield */ + drm_connector_init(dev, connector, &mdfld_dsi_connector_funcs, + DRM_MODE_CONNECTOR_LVDS); drm_connector_helper_add(connector, &mdfld_dsi_connector_helper_funcs); connector->display_info.subpixel_order = SubPixelHorizontalRGB; diff --git a/drivers/staging/gma500/medfield.h b/drivers/staging/gma500/medfield.h index 38165e8367e5..09e9687431f1 100644 --- a/drivers/staging/gma500/medfield.h +++ b/drivers/staging/gma500/medfield.h @@ -21,8 +21,6 @@ * DEALINGS IN THE SOFTWARE. */ -#define DRM_MODE_ENCODER_MIPI 5 - /* Medfield DSI controller registers */ #define MIPIA_DEVICE_READY_REG 0xb000 diff --git a/drivers/staging/gma500/psb_drv.h b/drivers/staging/gma500/psb_drv.h index 72f487a2a1b7..fd4732dd783a 100644 --- a/drivers/staging/gma500/psb_drv.h +++ b/drivers/staging/gma500/psb_drv.h @@ -35,7 +35,6 @@ /* Append new drm mode definition here, align with libdrm definition */ #define DRM_MODE_SCALE_NO_SCALE 2 -#define DRM_MODE_CONNECTOR_MIPI 15 enum { CHIP_PSB_8108 = 0, /* Poulsbo */ -- cgit v1.2.3 From 4fec0e0bde09095b6349dc6206dbf19cebcd0a7e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 15 Aug 2011 21:41:43 -0700 Subject: xen: self-balloon needs module.h Fix build errors (found when CONFIG_SYSFS is not enabled): drivers/xen/xen-selfballoon.c:446: warning: data definition has no type or storage class drivers/xen/xen-selfballoon.c:446: warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL' drivers/xen/xen-selfballoon.c:446: warning: parameter names (without types) in function declaration drivers/xen/xen-selfballoon.c:485: error: expected declaration specifiers or '...' before string constant drivers/xen/xen-selfballoon.c:485: warning: data definition has no type or storage class drivers/xen/xen-selfballoon.c:485: warning: type defaults to 'int' in declaration of 'MODULE_LICENSE' drivers/xen/xen-selfballoon.c:485: warning: function declaration isn't a prototype Signed-off-by: Randy Dunlap Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Linus Torvalds --- drivers/xen/xen-selfballoon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index 1b4afd81f872..6ea852e25162 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From df3d8ae1f8780166a16dd7d08b4842a4d5b5f2b4 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 2 Aug 2011 12:54:31 -0700 Subject: KVM: uses TASKSTATS, depends on NET CONFIG_TASKSTATS just had a change to use netlink, including a change to "depends on NET". Since "select" does not follow dependencies, KVM also needs to depend on NET to prevent build errors when CONFIG_NET is not enabled. Sample of the reported "undefined reference" build errors: taskstats.c:(.text+0x8f686): undefined reference to `nla_put' taskstats.c:(.text+0x8f721): undefined reference to `nla_reserve' taskstats.c:(.text+0x8f8fb): undefined reference to `init_net' taskstats.c:(.text+0x8f905): undefined reference to `netlink_unicast' taskstats.c:(.text+0x8f934): undefined reference to `kfree_skb' taskstats.c:(.text+0x8f9e9): undefined reference to `skb_clone' taskstats.c:(.text+0x90060): undefined reference to `__alloc_skb' taskstats.c:(.text+0x901e9): undefined reference to `skb_put' taskstats.c:(.init.text+0x4665): undefined reference to `genl_register_family' taskstats.c:(.init.text+0x4699): undefined reference to `genl_register_ops' taskstats.c:(.init.text+0x4710): undefined reference to `genl_unregister_ops' taskstats.c:(.init.text+0x471c): undefined reference to `genl_unregister_family' Signed-off-by: Randy Dunlap Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 0a09b58bb1cb..ff5790d8e990 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -22,6 +22,8 @@ config KVM depends on HAVE_KVM # for device assignment: depends on PCI + # for TASKSTATS/TASK_DELAY_ACCT: + depends on NET select PREEMPT_NOTIFIERS select MMU_NOTIFIER select ANON_INODES -- cgit v1.2.3 From 22cfb0bf6721bb1f865f67bc21e3c36c272faf36 Mon Sep 17 00:00:00 2001 From: Bernd Schubert Date: Tue, 16 Aug 2011 10:56:54 +0000 Subject: IPoIB: Fix possible NULL dereference in ipoib_start_xmit() Fix a bug introduced in 69cce1d14049 ("net: Abstract dst->neighbour accesses behind helpers.") where we might dereference skb_dst(skb) even if it is NULL, which causes: [ 240.944030] BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 [ 240.948007] IP: [] ipoib_start_xmit+0x39/0x280 [ib_ipoib] [...] [ 240.948007] Call Trace: [ 240.948007] [ 240.948007] [] dev_hard_start_xmit+0x2a0/0x590 [ 240.948007] [] ? arp_create+0x70/0x200 [ 240.948007] [] sch_direct_xmit+0xef/0x1c0 Addresses: https://bugzilla.kernel.org/show_bug.cgi?id=41212 Signed-off-by: Bernd Schubert Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 43f89ba0a908..fe89c4660d55 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -717,11 +717,13 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh; - struct neighbour *n; + struct neighbour *n = NULL; unsigned long flags; - n = dst_get_neighbour(skb_dst(skb)); - if (likely(skb_dst(skb) && n)) { + if (likely(skb_dst(skb))) + n = dst_get_neighbour(skb_dst(skb)); + + if (likely(n)) { if (unlikely(!*to_ipoib_neigh(n))) { ipoib_path_lookup(skb, dev); return NETDEV_TX_OK; -- cgit v1.2.3 From 48df4a6fd8c40c0bbcbca2044f5f2bc75dcf6db1 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Fri, 12 Aug 2011 10:23:01 -0700 Subject: xhci: Handle zero-length isochronous packets. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a long time, the xHCI driver has had this note: /* FIXME: Ignoring zero-length packets, can those happen? */ It turns out that, yes, there are drivers that need to queue zero-length transfers for isochronous OUT transfers. Without this patch, users will see kernel hang messages when a driver attempts to enqueue an isochronous URB with a zero length transfer (because count_isoc_trbs_needed will return zero for that TD, xhci_td->last_trb will never be set, and updating the dequeue pointer will cause an infinite loop). Matěj ran into this issue when using an NI Audio4DJ USB soundcard with the snd-usb-caiaq driver. See https://bugzilla.kernel.org/show_bug.cgi?id=40702 Fix count_isoc_trbs_needed() to return 1 for zero-length transfers (thanks Alan on the math help). Update the various TRB field calculations to deal with zero-length transfers. We're still transferring one packet with a zero-length data payload, so the total_packet_count should be 1. The Transfer Burst Count (TBC) and Transfer Last Burst Packet Count (TLBPC) fields should be set to zero. This patch should be backported to kernels as old as 2.6.36. Signed-off-by: Sarah Sharp Tested-by: Matěj Laitl Cc: Daniel Mack Cc: Alan Stern Cc: stable@kernel.org --- drivers/usb/host/xhci-ring.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b2d654b7477e..54139a2f06ce 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2684,6 +2684,10 @@ static u32 xhci_v1_0_td_remainder(int running_total, int trb_buff_len, { int packets_transferred; + /* One TRB with a zero-length data packet. */ + if (running_total == 0 && trb_buff_len == 0) + return 0; + /* All the TRB queueing functions don't count the current TRB in * running_total. */ @@ -3125,20 +3129,15 @@ static int count_isoc_trbs_needed(struct xhci_hcd *xhci, struct urb *urb, int i) { int num_trbs = 0; - u64 addr, td_len, running_total; + u64 addr, td_len; addr = (u64) (urb->transfer_dma + urb->iso_frame_desc[i].offset); td_len = urb->iso_frame_desc[i].length; - running_total = TRB_MAX_BUFF_SIZE - (addr & (TRB_MAX_BUFF_SIZE - 1)); - running_total &= TRB_MAX_BUFF_SIZE - 1; - if (running_total != 0) - num_trbs++; - - while (running_total < td_len) { + num_trbs = DIV_ROUND_UP(td_len + (addr & (TRB_MAX_BUFF_SIZE - 1)), + TRB_MAX_BUFF_SIZE); + if (num_trbs == 0) num_trbs++; - running_total += TRB_MAX_BUFF_SIZE; - } return num_trbs; } @@ -3250,9 +3249,11 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, addr = start_addr + urb->iso_frame_desc[i].offset; td_len = urb->iso_frame_desc[i].length; td_remain_len = td_len; - /* FIXME: Ignoring zero-length packets, can those happen? */ total_packet_count = roundup(td_len, le16_to_cpu(urb->ep->desc.wMaxPacketSize)); + /* A zero-length transfer still involves at least one packet. */ + if (total_packet_count == 0) + total_packet_count++; burst_count = xhci_get_burst_count(xhci, urb->dev, urb, total_packet_count); residue = xhci_get_last_burst_packet_count(xhci, -- cgit v1.2.3 From eb39d34004888afcc0a44d9c36383cd69fa3b3b9 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 26 Jul 2011 16:59:00 -0700 Subject: target: Change TCM_NON_EXISTENT_LUN response to ASC=LOGICAL UNIT NOT SUPPORTED This patch changes transport_send_check_condition_and_sense() for TCM_NON_EXISTENT_LUN emulation to use 0x25 (LOGICAL UNIT NOT SUPPORTED) instead of the original 0x20 (INVALID COMMAND OPERATION CODE). This is helpful to distinguish between TCM_UNSUPPORTED_SCSI_OPCODE ASC=0x20 exceptions. Signed-off-by: Nicholas A. Bellinger --- drivers/target/target_core_transport.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 89760329d5d0..cc5a339d4d5a 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -4726,6 +4726,13 @@ int transport_send_check_condition_and_sense( */ switch (reason) { case TCM_NON_EXISTENT_LUN: + /* CURRENT ERROR */ + buffer[offset] = 0x70; + /* ILLEGAL REQUEST */ + buffer[offset+SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; + /* LOGICAL UNIT NOT SUPPORTED */ + buffer[offset+SPC_ASC_KEY_OFFSET] = 0x25; + break; case TCM_UNSUPPORTED_SCSI_OPCODE: case TCM_SECTOR_COUNT_TOO_MANY: /* CURRENT ERROR */ -- cgit v1.2.3 From d5e2003c2bcda93a8f2e668eb4642d70c9c38301 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 4 Aug 2011 14:52:27 +0000 Subject: Btrfs: detect wether a device supports discard We have a problem where if a user specifies discard but doesn't actually support it we will return EOPNOTSUPP from btrfs_discard_extent. This is a problem because this gets called (in a fashion) from the tree log recovery code, which has a nice little BUG_ON(ret) after it, which causes us to fail the tree log replay. So instead detect wether our devices support discard when we're adding them and then don't issue discards if we know that the device doesn't support it. And just for good measure set ret = 0 in btrfs_issue_discard just in case we still get EOPNOTSUPP so we don't screw anybody up like this again. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 12 ++++++++++-- fs/btrfs/volumes.c | 17 +++++++++++++++++ fs/btrfs/volumes.h | 2 ++ 3 files changed, 29 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 66bac226944e..059dfa048cc0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1782,6 +1782,9 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, for (i = 0; i < multi->num_stripes; i++, stripe++) { + if (!stripe->dev->can_discard) + continue; + ret = btrfs_issue_discard(stripe->dev->bdev, stripe->physical, stripe->length); @@ -1789,11 +1792,16 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, discarded_bytes += stripe->length; else if (ret != -EOPNOTSUPP) break; + + /* + * Just in case we get back EOPNOTSUPP for some reason, + * just ignore the return value so we don't screw up + * people calling discard_extent. + */ + ret = 0; } kfree(multi); } - if (discarded_bytes && ret == -EOPNOTSUPP) - ret = 0; if (actual_bytes) *actual_bytes = discarded_bytes; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3c5f2fcd82c1..a595f8775c37 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -517,6 +517,9 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) fs_devices->rw_devices--; } + if (device->can_discard) + fs_devices->num_can_discard--; + new_device = kmalloc(sizeof(*new_device), GFP_NOFS); BUG_ON(!new_device); memcpy(new_device, device, sizeof(*new_device)); @@ -525,6 +528,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) new_device->bdev = NULL; new_device->writeable = 0; new_device->in_fs_metadata = 0; + new_device->can_discard = 0; list_replace_rcu(&device->dev_list, &new_device->dev_list); call_rcu(&device->rcu, free_device); @@ -564,6 +568,7 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fmode_t flags, void *holder) { + struct request_queue *q; struct block_device *bdev; struct list_head *head = &fs_devices->devices; struct btrfs_device *device; @@ -620,6 +625,12 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, seeding = 0; } + q = bdev_get_queue(bdev); + if (blk_queue_discard(q)) { + device->can_discard = 1; + fs_devices->num_can_discard++; + } + device->bdev = bdev; device->in_fs_metadata = 0; device->mode = flags; @@ -1560,6 +1571,7 @@ error: int btrfs_init_new_device(struct btrfs_root *root, char *device_path) { + struct request_queue *q; struct btrfs_trans_handle *trans; struct btrfs_device *device; struct block_device *bdev; @@ -1629,6 +1641,9 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) lock_chunks(root); + q = bdev_get_queue(bdev); + if (blk_queue_discard(q)) + device->can_discard = 1; device->writeable = 1; device->work.func = pending_bios_fn; generate_random_uuid(device->uuid); @@ -1664,6 +1679,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) root->fs_info->fs_devices->num_devices++; root->fs_info->fs_devices->open_devices++; root->fs_info->fs_devices->rw_devices++; + if (device->can_discard) + root->fs_info->fs_devices->num_can_discard++; root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; if (!blk_queue_nonrot(bdev_get_queue(bdev))) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 7c12d61ae7ae..6d866db4e177 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -48,6 +48,7 @@ struct btrfs_device { int writeable; int in_fs_metadata; int missing; + int can_discard; spinlock_t io_lock; @@ -104,6 +105,7 @@ struct btrfs_fs_devices { u64 rw_devices; u64 missing_devices; u64 total_rw_bytes; + u64 num_can_discard; struct block_device *latest_bdev; /* all of the devices in the FS, protected by a mutex -- cgit v1.2.3 From 34f3e4f23ca3d259fe078f62a128d97ca83508ef Mon Sep 17 00:00:00 2001 From: liubo Date: Sat, 6 Aug 2011 08:35:23 +0000 Subject: Btrfs: fix an oops of log replay When btrfs recovers from a crash, it may hit the oops below: ------------[ cut here ]------------ kernel BUG at fs/btrfs/inode.c:4580! [...] RIP: 0010:[] [] btrfs_add_link+0x161/0x1c0 [btrfs] [...] Call Trace: [] ? btrfs_inode_ref_index+0x31/0x80 [btrfs] [] add_inode_ref+0x319/0x3f0 [btrfs] [] replay_one_buffer+0x2c7/0x390 [btrfs] [] walk_down_log_tree+0x32a/0x480 [btrfs] [] walk_log_tree+0xf5/0x240 [btrfs] [] btrfs_recover_log_trees+0x250/0x350 [btrfs] [] ? btrfs_recover_log_trees+0x350/0x350 [btrfs] [] open_ctree+0x1442/0x17d0 [btrfs] [...] This comes from that while replaying an inode ref item, we forget to check those old conflicting DIR_ITEM and DIR_INDEX items in fs/file tree, then we will come to conflict corners which lead to BUG_ON(). Signed-off-by: Liu Bo Tested-by: Andy Lutomirski Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index babee65f8eda..786639fca067 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -799,14 +799,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, struct extent_buffer *eb, int slot, struct btrfs_key *key) { - struct inode *dir; - int ret; struct btrfs_inode_ref *ref; + struct btrfs_dir_item *di; + struct inode *dir; struct inode *inode; - char *name; - int namelen; unsigned long ref_ptr; unsigned long ref_end; + char *name; + int namelen; + int ret; int search_done = 0; /* @@ -909,6 +910,25 @@ again: } btrfs_release_path(path); + /* look for a conflicting sequence number */ + di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), + btrfs_inode_ref_index(eb, ref), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(path); + + /* look for a conflicing name */ + di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), + name, namelen, 0); + if (di && !IS_ERR(di)) { + ret = drop_one_dir_item(trans, root, path, dir, di); + BUG_ON(ret); + } + btrfs_release_path(path); + insert: /* insert our name */ ret = btrfs_add_link(trans, dir, inode, name, namelen, 0, -- cgit v1.2.3 From 38c01b9605923cfdff5413e0a12e58ee8d962257 Mon Sep 17 00:00:00 2001 From: liubo Date: Tue, 2 Aug 2011 02:39:03 +0000 Subject: Btrfs: fix a bug of balance on full multi-disk partitions When balancing, we'll first try to shrink devices for some space, but if it is working on a full multi-disk partition with raid protection, we may encounter a bug, that is, while shrinking, total_bytes may be less than bytes_used, and btrfs may allocate a dev extent that accesses out of device's bounds. Then we will not be able to write or read the data which stores at the end of the device, and get the followings: device fsid 0939f071-7ea3-46c8-95df-f176d773bfb6 devid 1 transid 10 /dev/sdb5 Btrfs detected SSD devices, enabling SSD mode btrfs: relocating block group 476315648 flags 9 btrfs: found 4 extents attempt to access beyond end of device sdb5: rw=145, want=546176, limit=546147 attempt to access beyond end of device sdb5: rw=145, want=546304, limit=546147 attempt to access beyond end of device sdb5: rw=145, want=546432, limit=546147 attempt to access beyond end of device sdb5: rw=145, want=546560, limit=546147 attempt to access beyond end of device Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a595f8775c37..46f9a208723d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -863,6 +863,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, max_hole_start = search_start; max_hole_size = 0; + hole_size = 0; if (search_start >= search_end) { ret = -ENOSPC; @@ -945,7 +946,14 @@ next: cond_resched(); } - hole_size = search_end- search_start; + /* + * At this point, search_start should be the end of + * allocated dev extents, and when shrinking the device, + * search_end may be smaller than search_start. + */ + if (search_end > search_start) + hole_size = search_end - search_start; + if (hole_size > max_hole_size) { max_hole_start = search_start; max_hole_size = hole_size; @@ -2447,9 +2455,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, total_avail = device->total_bytes - device->bytes_used; else total_avail = 0; - /* avail is off by max(alloc_start, 1MB), but that is the same - * for all devices, so it doesn't hurt the sorting later on - */ + + /* If there is no space on this device, skip it. */ + if (total_avail == 0) + continue; ret = find_free_dev_extent(trans, device, max_stripe_size * dev_stripes, -- cgit v1.2.3 From cdcb725c05fe0cb71777c66ddc2445fedbbb3c59 Mon Sep 17 00:00:00 2001 From: liubo Date: Wed, 3 Aug 2011 10:15:25 +0000 Subject: Btrfs: check if there is enough space for balancing smarter When checking if there is enough space for balancing a block group, since we do not take raid types into consideration, we do not account corrent amounts of space that we needed. This makes us do some extra work before we get ENOSPC. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 41 +++++++++++++++++++++++++++++++++++------ 1 file changed, 35 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 059dfa048cc0..a3e71b59f66e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6728,6 +6728,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) struct btrfs_space_info *space_info; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_device *device; + u64 min_free; + int index; + int dev_nr = 0; + int dev_min = 1; int full = 0; int ret = 0; @@ -6737,8 +6741,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) if (!block_group) return -1; + min_free = btrfs_block_group_used(&block_group->item); + /* no bytes used, we're good */ - if (!btrfs_block_group_used(&block_group->item)) + if (!min_free) goto out; space_info = block_group->space_info; @@ -6754,10 +6760,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) * all of the extents from this block group. If we can, we're good */ if ((space_info->total_bytes != block_group->key.offset) && - (space_info->bytes_used + space_info->bytes_reserved + - space_info->bytes_pinned + space_info->bytes_readonly + - btrfs_block_group_used(&block_group->item) < - space_info->total_bytes)) { + (space_info->bytes_used + space_info->bytes_reserved + + space_info->bytes_pinned + space_info->bytes_readonly + + min_free < space_info->total_bytes)) { spin_unlock(&space_info->lock); goto out; } @@ -6774,9 +6779,29 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) if (full) goto out; + /* + * index: + * 0: raid10 + * 1: raid1 + * 2: dup + * 3: raid0 + * 4: single + */ + index = get_block_group_index(block_group); + if (index == 0) { + dev_min = 4; + min_free /= 2; + } else if (index == 1) { + dev_min = 2; + } else if (index == 2) { + min_free *= 2; + } else if (index == 3) { + dev_min = fs_devices->rw_devices; + min_free /= dev_min; + } + mutex_lock(&root->fs_info->chunk_mutex); list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { - u64 min_free = btrfs_block_group_used(&block_group->item); u64 dev_offset; /* @@ -6787,7 +6812,11 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) ret = find_free_dev_extent(NULL, device, min_free, &dev_offset, NULL); if (!ret) + dev_nr++; + + if (dev_nr >= dev_min) break; + ret = -1; } } -- cgit v1.2.3 From cb1b69f4508a1e8c1a7907379eafceb7ae0325ef Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Tue, 9 Aug 2011 07:11:13 +0000 Subject: Btrfs: forced readonly when btrfs_drop_snapshot() fails The filesystem turns readonly instead of returning the error to the caller when detected error in btrfs_drop_snapshot(). and, because the caller doesn't check the error, the function type is changed to 'void'. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 4 ++-- fs/btrfs/extent-tree.c | 22 ++++++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a6263bdab818..884293642a6c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2367,8 +2367,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); -int btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref); +void btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref); int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a3e71b59f66e..80d6148f60ac 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6277,8 +6277,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, * also make sure backrefs for the shared block and all lower level * blocks are properly updated. */ -int btrfs_drop_snapshot(struct btrfs_root *root, - struct btrfs_block_rsv *block_rsv, int update_ref) +void btrfs_drop_snapshot(struct btrfs_root *root, + struct btrfs_block_rsv *block_rsv, int update_ref) { struct btrfs_path *path; struct btrfs_trans_handle *trans; @@ -6291,13 +6291,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int level; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + err = -ENOMEM; + goto out; + } wc = kzalloc(sizeof(*wc), GFP_NOFS); if (!wc) { btrfs_free_path(path); - return -ENOMEM; + err = -ENOMEM; + goto out; } trans = btrfs_start_transaction(tree_root, 0); @@ -6326,7 +6329,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, path->lowest_level = 0; if (ret < 0) { err = ret; - goto out; + goto out_free; } WARN_ON(ret > 0); @@ -6433,11 +6436,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); kfree(root); } -out: +out_free: btrfs_end_transaction_throttle(trans, tree_root); kfree(wc); btrfs_free_path(path); - return err; +out: + if (err) + btrfs_std_error(root->fs_info, err); + return; } /* -- cgit v1.2.3 From c97c2916e25c56e878e3e94efd449e2d688fcb31 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 3 Aug 2011 08:11:41 +0000 Subject: Btrfs: use plain page_address() in header fields setget functions We've stopped using highmem for extent buffers. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 884293642a6c..8b99c79ad1a7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1415,17 +1415,15 @@ void btrfs_set_##name(struct extent_buffer *eb, type *s, u##bits val); #define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ static inline u##bits btrfs_##name(struct extent_buffer *eb) \ { \ - type *p = kmap_atomic(eb->first_page, KM_USER0); \ + type *p = page_address(eb->first_page); \ u##bits res = le##bits##_to_cpu(p->member); \ - kunmap_atomic(p, KM_USER0); \ return res; \ } \ static inline void btrfs_set_##name(struct extent_buffer *eb, \ u##bits val) \ { \ - type *p = kmap_atomic(eb->first_page, KM_USER0); \ + type *p = page_address(eb->first_page); \ p->member = cpu_to_le##bits(val); \ - kunmap_atomic(p, KM_USER0); \ } #define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ -- cgit v1.2.3 From f4ac904c411b55e58bb240f332f93db2455f0010 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 5 Aug 2011 14:19:00 +0000 Subject: btrfs: memory leak in btrfs_add_inode_defrag() We don't use the defrag struct on this path. Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/file.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 010aec8be824..0705d15542c6 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -150,6 +150,8 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, spin_lock(&root->fs_info->defrag_inodes_lock); if (!BTRFS_I(inode)->in_defrag) __btrfs_add_inode_defrag(inode, defrag); + else + kfree(defrag); spin_unlock(&root->fs_info->defrag_inodes_lock); return 0; } -- cgit v1.2.3 From bb3ac5a4dfc8eeb881206c77d9f925e320d9c41a Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 5 Aug 2011 09:32:35 +0000 Subject: Btrfs: fix wrong free space information Btrfs subtracted the size of the allocated space twice when it allocated the space from the bitmap in the cluster, it broke the free space information and led to oops finally. And this patch also fixes the bug that ctl->free_space was subtracted without lock. Reported-by: Liu Bo Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6377713f639c..6a265b9f85f2 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1168,9 +1168,9 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) div64_u64(extent_bytes, (sizeof(struct btrfs_free_space))); } -static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, - struct btrfs_free_space *info, u64 offset, - u64 bytes) +static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, + u64 offset, u64 bytes) { unsigned long start, count; @@ -1181,6 +1181,13 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, bitmap_clear(info->bitmap, start, count); info->bytes -= bytes; +} + +static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *info, u64 offset, + u64 bytes) +{ + __bitmap_clear_bits(ctl, info, offset, bytes); ctl->free_space -= bytes; } @@ -1984,7 +1991,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, return 0; ret = search_start; - bitmap_clear_bits(ctl, entry, ret, bytes); + __bitmap_clear_bits(ctl, entry, ret, bytes); return ret; } @@ -2039,7 +2046,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, continue; } } else { - ret = entry->offset; entry->offset += bytes; -- cgit v1.2.3 From 0e588859618be54ec100373f1b86296271ce5307 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 5 Aug 2011 09:32:37 +0000 Subject: Btrfs: fix uninitialized sync_pending sync_pending is uninitialized before it be used, fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 46f9a208723d..f2a4cc79da61 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -142,7 +142,7 @@ static noinline int run_scheduled_bios(struct btrfs_device *device) unsigned long limit; unsigned long last_waited = 0; int force_reg = 0; - int sync_pending; + int sync_pending = 0; struct blk_plug plug; /* -- cgit v1.2.3 From f81c9cdc567cd3160ff9e64868d9a1a7ee226480 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 10 Aug 2011 18:04:04 +0000 Subject: Btrfs: truncate pages from clone ioctl target range We need to truncate page cache pages for the clone ioctl target range or else we'll confuse ourselves to no end. If the old data was cached, we used to still see it (until remount). If the page was partially updated we used to get a mix of old and new data. Signed-off-by: Sage Weil Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2bb08862a4f6..b3d249d6eba7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2244,6 +2244,10 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, btrfs_wait_ordered_range(src, off, len); } + /* truncate page cache pages from target inode range */ + truncate_inode_pages_range(&inode->i_data, off, + ALIGN(off + len, PAGE_CACHE_SIZE) - 1); + /* clone data */ key.objectid = btrfs_ino(src); key.type = BTRFS_EXTENT_DATA_KEY; -- cgit v1.2.3 From c331eb580a0a7906c0cdb8dbae3cfe99e3c0e555 Mon Sep 17 00:00:00 2001 From: Andrew Drake Date: Tue, 16 Aug 2011 11:07:39 -0700 Subject: Input: bcm5974 - Add support for newer MacBookPro8,2 New MacBook Pro devices reporting product name MacBookPro8,2 come with newer/higher resolution touchpads than others with the same product name with USB ID 05ac:0252. This patch adds support for these devices. Signed-off-by: Andrew Drake Reviewed-by: Wanlong Gao Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/bcm5974.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/input/mouse/bcm5974.c b/drivers/input/mouse/bcm5974.c index 48d9ec13d32d..da280189ef07 100644 --- a/drivers/input/mouse/bcm5974.c +++ b/drivers/input/mouse/bcm5974.c @@ -71,6 +71,10 @@ #define USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI 0x024c #define USB_DEVICE_ID_APPLE_WELLSPRING6_ISO 0x024d #define USB_DEVICE_ID_APPLE_WELLSPRING6_JIS 0x024e +/* Macbook8,2 (unibody) */ +#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI 0x0252 +#define USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO 0x0253 +#define USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS 0x0254 #define BCM5974_DEVICE(prod) { \ .match_flags = (USB_DEVICE_ID_MATCH_DEVICE | \ @@ -112,6 +116,10 @@ static const struct usb_device_id bcm5974_table[] = { BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ANSI), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_ISO), BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING6_JIS), + /* MacbookPro8,2 */ + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO), + BCM5974_DEVICE(USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS), /* Terminating entry */ {} }; @@ -314,6 +322,18 @@ static const struct bcm5974_config bcm5974_config_table[] = { { DIM_X, DIM_X / SN_COORD, -4620, 5140 }, { DIM_Y, DIM_Y / SN_COORD, -150, 6600 } }, + { + USB_DEVICE_ID_APPLE_WELLSPRING5A_ANSI, + USB_DEVICE_ID_APPLE_WELLSPRING5A_ISO, + USB_DEVICE_ID_APPLE_WELLSPRING5A_JIS, + HAS_INTEGRATED_BUTTON, + 0x84, sizeof(struct bt_data), + 0x81, TYPE2, FINGER_TYPE2, FINGER_TYPE2 + SIZEOF_ALL_FINGERS, + { DIM_PRESSURE, DIM_PRESSURE / SN_PRESSURE, 0, 300 }, + { DIM_WIDTH, DIM_WIDTH / SN_WIDTH, 0, 2048 }, + { DIM_X, DIM_X / SN_COORD, -4750, 5280 }, + { DIM_Y, DIM_Y / SN_COORD, -150, 6730 } + }, {} }; -- cgit v1.2.3 From 28ac293363368650963ee4c1e323c1ff502c121f Mon Sep 17 00:00:00 2001 From: Yufeng Shen Date: Tue, 16 Aug 2011 00:40:54 -0700 Subject: Input: atmel_mxt_ts - report pressure information from the driver Atmel mxt1386 touch controller has the touch pressure information so let's report it to the user space. [dtor@mail.ru: added ABS_RESSURE reporting for ST emulation.] Signed-off-by: Yufeng Shen Acked-by: Wanlong Gao Acked-by: Henrik Rydberg Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/atmel_mxt_ts.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index ae00604a6a81..f5d66859f232 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -244,6 +244,7 @@ struct mxt_finger { int x; int y; int area; + int pressure; }; /* Each client has this additional data */ @@ -536,6 +537,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id) finger[id].x); input_report_abs(input_dev, ABS_MT_POSITION_Y, finger[id].y); + input_report_abs(input_dev, ABS_MT_PRESSURE, + finger[id].pressure); } else { finger[id].status = 0; } @@ -546,6 +549,8 @@ static void mxt_input_report(struct mxt_data *data, int single_id) if (status != MXT_RELEASE) { input_report_abs(input_dev, ABS_X, finger[single_id].x); input_report_abs(input_dev, ABS_Y, finger[single_id].y); + input_report_abs(input_dev, + ABS_PRESSURE, finger[single_id].pressure); } input_sync(input_dev); @@ -560,6 +565,7 @@ static void mxt_input_touchevent(struct mxt_data *data, int x; int y; int area; + int pressure; /* Check the touch is present on the screen */ if (!(status & MXT_DETECT)) { @@ -584,6 +590,7 @@ static void mxt_input_touchevent(struct mxt_data *data, y = y >> 2; area = message->message[4]; + pressure = message->message[5]; dev_dbg(dev, "[%d] %s x: %d, y: %d, area: %d\n", id, status & MXT_MOVE ? "moved" : "pressed", @@ -594,6 +601,7 @@ static void mxt_input_touchevent(struct mxt_data *data, finger[id].x = x; finger[id].y = y; finger[id].area = area; + finger[id].pressure = pressure; mxt_input_report(data, id); } @@ -1116,6 +1124,8 @@ static int __devinit mxt_probe(struct i2c_client *client, 0, data->max_x, 0, 0); input_set_abs_params(input_dev, ABS_Y, 0, data->max_y, 0, 0); + input_set_abs_params(input_dev, ABS_PRESSURE, + 0, 255, 0, 0); /* For multi touch */ input_mt_init_slots(input_dev, MXT_MAX_FINGER); @@ -1125,6 +1135,8 @@ static int __devinit mxt_probe(struct i2c_client *client, 0, data->max_x, 0, 0); input_set_abs_params(input_dev, ABS_MT_POSITION_Y, 0, data->max_y, 0, 0); + input_set_abs_params(input_dev, ABS_MT_PRESSURE, + 0, 255, 0, 0); input_set_drvdata(input_dev, data); i2c_set_clientdata(client, data); -- cgit v1.2.3 From 25b7679136fd85b1e5197e36a0ca126163e89590 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 17 Aug 2011 09:20:01 +0200 Subject: ASoC: Fix check for symmetric rate enforcement The ASoC core tries to not enforce symmetric rates when two streams open simultaneously. It does so by checking rtd->rate being zero. This works exactly once after booting because it is not set to zero again when the streams close. Fix this by setting rtd->rate when no active stream is left. [This leads to lots of warnings about not enforcing the symmetry in some situations as there's a race in the userspace API where we know we've got two applications but don't know what rates they want to set. -- broonie ] Signed-off-by: Sascha Hauer Signed-off-by: Mark Brown --- sound/soc/soc-pcm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c index b5759397afa3..2879c883eebc 100644 --- a/sound/soc/soc-pcm.c +++ b/sound/soc/soc-pcm.c @@ -290,6 +290,9 @@ static int soc_pcm_close(struct snd_pcm_substream *substream) codec_dai->active--; codec->active--; + if (!cpu_dai->active && !codec_dai->active) + rtd->rate = 0; + /* Muting the DAC suppresses artifacts caused during digital * shutdown, for example from stopping clocks. */ -- cgit v1.2.3 From 8c320c079cde0286d71368961231e426539868b4 Mon Sep 17 00:00:00 2001 From: Jonas Aberg Date: Wed, 17 Aug 2011 19:10:06 +0900 Subject: fat: fix build warning This fixes a compile warning (unititialized variable) in the fat filesystem code. Signed-off-by: Jonas Aberg Signed-off-by: Lee Jones Signed-off-by: Linus Walleij Signed-off-by: OGAWA Hirofumi --- fs/fat/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4ad64732cbce..5efbd5d7701a 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -1231,7 +1231,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, struct super_block *sb = dir->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); struct buffer_head *bh, *prev, *bhs[3]; /* 32*slots (672bytes) */ - struct msdos_dir_entry *de; + struct msdos_dir_entry *uninitialized_var(de); int err, free_slots, i, nr_bhs; loff_t pos, i_pos; -- cgit v1.2.3 From 186b53701ca5a843b07ca44a8d954dc6043c70f4 Mon Sep 17 00:00:00 2001 From: Mihai Moldovan Date: Wed, 17 Aug 2011 19:10:08 +0900 Subject: fat: fix utf8 iocharset warning message The fat_msg function already formats the given message and appends a newline to it - we don't need to do this in the passed message string as well, or will end up with a blank line printed in the kernel log ring buffer. Also change the loglevel from error to warning. Signed-off-by: Mihai Moldovan Signed-off-by: OGAWA Hirofumi --- fs/fat/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index cb8d8391ac0b..52bcf58104e2 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1186,9 +1186,9 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat, out: /* UTF-8 doesn't provide FAT semantics */ if (!strcmp(opts->iocharset, "utf8")) { - fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset" + fat_msg(sb, KERN_WARNING, "utf8 is not a recommended IO charset" " for FAT filesystems, filesystem will be " - "case sensitive!\n"); + "case sensitive!"); } /* If user doesn't specify allow_utime, it's initialized from dmask. */ -- cgit v1.2.3 From 710d4403a45c4040a9aa86971d50958f5ae6ed40 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Wed, 17 Aug 2011 19:10:09 +0900 Subject: fat: fat16 support maximum 4GB file/vol size as WinXP or 7. FAT16 support maximum 4GB vol/file size with 64KB cluster size. Win NT/XP/7 increased the maximum cluster size to 64KB, and file/vol size increased 4GB also. Although increasing, the file size of linux FAT is still limited at 2GB. I found that it is limited by sb->maxbytes(0x7fffffff) when partition is formatted by FAT16. sb->s_maxbytes in fill_super should be set to 0xffffffff like fat32. Signed-off-by: Namjae Jeon Signed-off-by: OGAWA Hirofumi --- fs/fat/inode.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 52bcf58104e2..017493b64317 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1365,6 +1365,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->free_clusters = -1; /* Don't know yet */ sbi->free_clus_valid = 0; sbi->prev_free = FAT_START_ENT; + sb->s_maxbytes = 0xffffffff; if (!sbi->fat_length && b->fat32_length) { struct fat_boot_fsinfo *fsinfo; @@ -1375,8 +1376,6 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, sbi->fat_length = le32_to_cpu(b->fat32_length); sbi->root_cluster = le32_to_cpu(b->root_cluster); - sb->s_maxbytes = 0xffffffff; - /* MC - if info_sector is 0, don't multiply by 0 */ sbi->fsinfo_sector = le16_to_cpu(b->info_sector); if (sbi->fsinfo_sector == 0) -- cgit v1.2.3 From ccbcdf7cf1b5f6c6db30d84095b9c6c53043af55 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 16 Aug 2011 15:07:41 +0100 Subject: xen/x86: replace order-based range checking of M2P table by linear one The order-based approach is not only less efficient (requiring a shift and a compare, typical generated code looking like this mov eax, [machine_to_phys_order] mov ecx, eax shr ebx, cl test ebx, ebx jnz ... whereas a direct check requires just a compare, like in cmp ebx, [machine_to_phys_nr] jae ... ), but also slightly dangerous in the 32-on-64 case - the element address calculation can wrap if the next power of two boundary is sufficiently far away from the actual upper limit of the table, and hence can result in user space addresses being accessed (with it being unknown what may actually be mapped there). Additionally, the elimination of the mistaken use of fls() here (should have been __fls()) fixes a latent issue on x86-64 that would trigger if the code was run on a system with memory extending beyond the 44-bit boundary. CC: stable@kernel.org Signed-off-by: Jan Beulich [v1: Based on Jeremy's feedback] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/include/asm/xen/page.h | 4 ++-- arch/x86/xen/enlighten.c | 4 ++-- arch/x86/xen/mmu.c | 12 ++++++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 64a619d47d34..7ff4669580cf 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -39,7 +39,7 @@ typedef struct xpaddr { ((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE)) extern unsigned long *machine_to_phys_mapping; -extern unsigned int machine_to_phys_order; +extern unsigned long machine_to_phys_nr; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); @@ -87,7 +87,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; - if (unlikely((mfn >> machine_to_phys_order) != 0)) { + if (unlikely(mfn >= machine_to_phys_nr)) { pfn = ~0; goto try_override; } diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 974a528458a0..b960429d5b65 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type); unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; EXPORT_SYMBOL(machine_to_phys_mapping); -unsigned int machine_to_phys_order; -EXPORT_SYMBOL(machine_to_phys_order); +unsigned long machine_to_phys_nr; +EXPORT_SYMBOL(machine_to_phys_nr); struct start_info *xen_start_info; EXPORT_SYMBOL_GPL(xen_start_info); diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f987bde77c49..24abc1f50dc5 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1713,15 +1713,19 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) void __init xen_setup_machphys_mapping(void) { struct xen_machphys_mapping mapping; - unsigned long machine_to_phys_nr_ents; if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) { machine_to_phys_mapping = (unsigned long *)mapping.v_start; - machine_to_phys_nr_ents = mapping.max_mfn + 1; + machine_to_phys_nr = mapping.max_mfn + 1; } else { - machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES; + machine_to_phys_nr = MACH2PHYS_NR_ENTRIES; } - machine_to_phys_order = fls(machine_to_phys_nr_ents - 1); +#ifdef CONFIG_X86_32 + if ((machine_to_phys_mapping + machine_to_phys_nr) + < machine_to_phys_mapping) + machine_to_phys_nr = (unsigned long *)NULL + - machine_to_phys_mapping; +#endif } #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 0ace64b85ea7b90e3bffe408b9d7c3364692bfa4 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 1 Aug 2011 21:12:09 +0000 Subject: IBiser: Fix wrong mask when sizeof (dma_addr_t) > sizeof (unsigned long) The code that prepares the SG associated with SCSI command for FMR was buggy for systems with DMA addresses that don't fit in unsigned long, e.g under the 32-bit based XenServer dom0 sizeof(dma_addr_t) is 8. Fix that by casting to unsigned long long a masking constant used by the code. This resolves a crash in iser_sg_to_page_vec on this system. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 342cbc1bdaae..db6f3ce9f3bf 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -89,7 +89,7 @@ } while (0) #define SHIFT_4K 12 -#define SIZE_4K (1UL << SHIFT_4K) +#define SIZE_4K (1ULL << SHIFT_4K) #define MASK_4K (~(SIZE_4K-1)) /* support up to 512KB in one RDMA */ -- cgit v1.2.3 From 200ae1a08bec8f3fedfcfe94c892d9a024db4e46 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 1 Aug 2011 21:14:09 +0000 Subject: IB/iser: Support iSCSI PDU padding RFC3270 mandates that iSCSI PDUs are padded to the closest integer number of four byte words. Fix the iser code to support that on both the TX/RX flows. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 10 +++++++--- drivers/infiniband/ulp/iser/iser_initiator.c | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 8db008de5392..9c61b9c2c597 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -101,13 +101,17 @@ iscsi_iser_recv(struct iscsi_conn *conn, /* verify PDU length */ datalen = ntoh24(hdr->dlength); - if (datalen != rx_data_len) { - printk(KERN_ERR "iscsi_iser: datalen %d (hdr) != %d (IB) \n", - datalen, rx_data_len); + if (datalen > rx_data_len || (datalen + 4) < rx_data_len) { + iser_err("wrong datalen %d (hdr), %d (IB)\n", + datalen, rx_data_len); rc = ISCSI_ERR_DATALEN; goto error; } + if (datalen != rx_data_len) + iser_dbg("aligned datalen (%d) hdr, %d (IB)\n", + datalen, rx_data_len); + /* read AHS */ ahslen = hdr->hlength * 4; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 5745b7fe158c..f299de6b419b 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -412,7 +412,7 @@ int iser_send_control(struct iscsi_conn *conn, memcpy(iser_conn->ib_conn->login_buf, task->data, task->data_count); tx_dsg->addr = iser_conn->ib_conn->login_dma; - tx_dsg->length = data_seg_len; + tx_dsg->length = task->data_count; tx_dsg->lkey = device->mr->lkey; mdesc->num_sge = 2; } -- cgit v1.2.3 From f991879473828f320a714e9494fb37a26ccd6b66 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 17 Aug 2011 13:45:09 +0100 Subject: mm: make HASHED_PAGE_VIRTUAL page_address' struct page argument const. Followup to 33dd4e0ec911 "mm: make some struct page's const" which missed the HASHED_PAGE_VIRTUAL case. Signed-off-by: Ian Campbell Cc: Andrea Arcangeli Cc: Rik van Riel Cc: Michel Lespinasse Cc: Mel Gorman Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hash.h | 2 +- include/linux/mm.h | 2 +- mm/highmem.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/hash.h b/include/linux/hash.h index 06d25c189cc5..b80506bdd733 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -63,7 +63,7 @@ static inline u32 hash_32(u32 val, unsigned int bits) return hash >> (32 - bits); } -static inline unsigned long hash_ptr(void *ptr, unsigned int bits) +static inline unsigned long hash_ptr(const void *ptr, unsigned int bits) { return hash_long((unsigned long)ptr, bits); } diff --git a/include/linux/mm.h b/include/linux/mm.h index fd599f4bb846..c06454d60a3d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -737,7 +737,7 @@ static __always_inline void *lowmem_page_address(const struct page *page) #endif #if defined(HASHED_PAGE_VIRTUAL) -void *page_address(struct page *page); +void *page_address(const struct page *page); void set_page_address(struct page *page, void *virtual); void page_address_init(void); #endif diff --git a/mm/highmem.c b/mm/highmem.c index 693394daa2ed..5ef672c07f75 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -326,7 +326,7 @@ static struct page_address_slot { spinlock_t lock; /* Protect this bucket's list */ } ____cacheline_aligned_in_smp page_address_htable[1< Date: Wed, 17 Aug 2011 17:40:33 +0100 Subject: mm: fix __page_to_pfn for a const struct page argument This allows the cast in lowmem_page_address (introduced as a warning fixup to 33dd4e0ec911 "mm: make some struct page's const") to be removed. Propagate const'ness to page_to_section() as well since it is required by __page_to_pfn. Signed-off-by: Ian Campbell Acked-by: Rik van Riel Cc: Andrea Arcangeli Cc: Michel Lespinasse Cc: Mel Gorman Cc: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/memory_model.h | 4 ++-- include/linux/mm.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h index fb2d63f13f4c..aea9e45efce6 100644 --- a/include/asm-generic/memory_model.h +++ b/include/asm-generic/memory_model.h @@ -39,7 +39,7 @@ }) #define __page_to_pfn(pg) \ -({ struct page *__pg = (pg); \ +({ const struct page *__pg = (pg); \ struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \ (unsigned long)(__pg - __pgdat->node_mem_map) + \ __pgdat->node_start_pfn; \ @@ -57,7 +57,7 @@ * section[i].section_mem_map == mem_map's address - start_pfn; */ #define __page_to_pfn(pg) \ -({ struct page *__pg = (pg); \ +({ const struct page *__pg = (pg); \ int __sec = page_to_section(__pg); \ (unsigned long)(__pg - __section_mem_map_addr(__nr_to_section(__sec))); \ }) diff --git a/include/linux/mm.h b/include/linux/mm.h index c06454d60a3d..7438071b44aa 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -685,7 +685,7 @@ static inline void set_page_section(struct page *page, unsigned long section) page->flags |= (section & SECTIONS_MASK) << SECTIONS_PGSHIFT; } -static inline unsigned long page_to_section(struct page *page) +static inline unsigned long page_to_section(const struct page *page) { return (page->flags >> SECTIONS_PGSHIFT) & SECTIONS_MASK; } @@ -720,7 +720,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone, static __always_inline void *lowmem_page_address(const struct page *page) { - return __va(PFN_PHYS(page_to_pfn((struct page *)page))); + return __va(PFN_PHYS(page_to_pfn(page))); } #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) -- cgit v1.2.3 From 338d0f0a6fbc82407864606f5b64b75aeb3c70f2 Mon Sep 17 00:00:00 2001 From: Timo Warns Date: Wed, 17 Aug 2011 17:59:56 +0200 Subject: befs: Validate length of long symbolic links. Signed-off-by: Timo Warns Signed-off-by: Linus Torvalds --- fs/befs/linuxvfs.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 54b8c28bebc8..720d885e8dca 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -474,17 +474,22 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) befs_data_stream *data = &befs_ino->i_data.ds; befs_off_t len = data->size; - befs_debug(sb, "Follow long symlink"); - - link = kmalloc(len, GFP_NOFS); - if (!link) { - link = ERR_PTR(-ENOMEM); - } else if (befs_read_lsymlink(sb, data, link, len) != len) { - kfree(link); - befs_error(sb, "Failed to read entire long symlink"); + if (len == 0) { + befs_error(sb, "Long symlink with illegal length"); link = ERR_PTR(-EIO); } else { - link[len - 1] = '\0'; + befs_debug(sb, "Follow long symlink"); + + link = kmalloc(len, GFP_NOFS); + if (!link) { + link = ERR_PTR(-ENOMEM); + } else if (befs_read_lsymlink(sb, data, link, len) != len) { + kfree(link); + befs_error(sb, "Failed to read entire long symlink"); + link = ERR_PTR(-EIO); + } else { + link[len - 1] = '\0'; + } } } else { link = befs_ino->i_data.symlink; -- cgit v1.2.3 From 77e57297b4ff3f602ba5105398d342a4b4a54774 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 23 May 2011 14:39:17 +0200 Subject: perf list: Fix exit value This patch fixes an issue with the exit value of perf list: $ perf list; echo $? 129 perf list returns an error exit code even though there is no error. There was a stray exit(129) in print_events(). This patch removes this exit(). $ perf list; echo $? 0 $ perf list hw sw cpu-cycles OR cycles [Hardware event] stalled-cycles-frontend OR idle-cycles-frontend [Hardware event] stalled-cycles-backend OR idle-cycles-backend [Hardware event] instructions [Hardware event] cache-references [Hardware event] cache-misses [Hardware event] branch-instructions OR branches [Hardware event] branch-misses [Hardware event] bus-cycles [Hardware event] cpu-clock [Software event] task-clock [Software event] page-faults OR faults [Software event] minor-faults [Software event] major-faults [Software event] context-switches OR cs [Software event] cpu-migrations OR migrations [Software event] alignment-faults [Software event] emulation-faults [Software event] $ echo $? 0 Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20110523123917.GA31060@quad Signed-off-by: Stephane Eranian Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4ea7e19f5251..d93f3cea93c7 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1097,6 +1097,4 @@ void print_events(const char *event_glob) printf("\n"); print_tracepoint_events(NULL, NULL); - - exit(129); } -- cgit v1.2.3 From cc2d86b04d9ac28a6be6cb05da6ea8f014fd5aa0 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 7 Jun 2011 18:19:36 +0200 Subject: perf evlist: Fix missing event name init for default event When no event is given to perf record, perf top, a default event is initialized (cycles). However, perf_evlist__add_default() was not setting the symbolic name for the event. Perf top worked simply because it was reconstructing the name from the event code. But it should not have to do this. This patch initializes the evsel->name field properly. This second version improves the code flow on the non error path. Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20110607161936.GA8163@quad Signed-off-by: Stephane Eranian [committer note: Use perf_evsel__delete() instead of plain free()] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index e03e7bc8205e..c12bd476c6f7 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -85,10 +85,19 @@ int perf_evlist__add_default(struct perf_evlist *evlist) struct perf_evsel *evsel = perf_evsel__new(&attr, 0); if (evsel == NULL) - return -ENOMEM; + goto error; + + /* use strdup() because free(evsel) assumes name is allocated */ + evsel->name = strdup("cycles"); + if (!evsel->name) + goto error_free; perf_evlist__add(evlist, evsel); return 0; +error_free: + perf_evsel__delete(evsel); +error: + return -ENOMEM; } void perf_evlist__disable(struct perf_evlist *evlist) -- cgit v1.2.3 From 777d1d71db622a5e1ff703495741c3d257b532e5 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Sat, 23 Jul 2011 04:10:43 +0200 Subject: perf tools: Fix error handling of unknown events There was a problem with the parse_events() code not printing the correct event name when an event was unknown and starting with an 'r'. The source of the problem was the way raw notation was parsed. Without the patch: $ perf stat -e retired_foo invalid event modifier: 'tired_foo' With the patch: $ perf stat -e retired_foo invalid or unsupported event: 'retired_foo' This also covers the case where the name of the event was not printed at all when perf was linked with libpfm4. Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20110723021043.GA20178@quad Signed-off-by: Stephane Eranian Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d93f3cea93c7..928918b796b2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -697,7 +697,11 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr) return EVT_FAILED; n = hex2u64(str + 1, &config); if (n > 0) { - *strp = str + n + 1; + const char *end = str + n + 1; + if (*end != '\0' && *end != ',' && *end != ':') + return EVT_FAILED; + + *strp = end; attr->type = PERF_TYPE_RAW; attr->config = config; return EVT_HANDLED; -- cgit v1.2.3 From 195bcbf5078d74c8e00d68f04eb8695196fb31e8 Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Thu, 18 Aug 2011 07:37:21 -0400 Subject: perf tools: Fix build against newer glibc Upstream glibc commit 295e904 added a definition for __attribute_const__ to cdefs.h. This causes the following error when building perf: util/include/linux/compiler.h:8:0: error: "__attribute_const__" redefined [-Werror] /usr/include/sys/cdefs.h:226:0: note: this is the location of the previous definition Wrap __attribute_const__ in #ifndef as we do for __always_inline. Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20110818113720.GL2227@zod.bos.redhat.com Signed-off-by: Josh Boyer Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/include/linux/compiler.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/include/linux/compiler.h b/tools/perf/util/include/linux/compiler.h index 791f9dd27ebf..547628e97f3d 100644 --- a/tools/perf/util/include/linux/compiler.h +++ b/tools/perf/util/include/linux/compiler.h @@ -5,7 +5,9 @@ #define __always_inline inline #endif #define __user +#ifndef __attribute_const__ #define __attribute_const__ +#endif #define __used __attribute__((__unused__)) -- cgit v1.2.3 From d53e8365eaacfdb29253b39d186109f5b4fcc08d Mon Sep 17 00:00:00 2001 From: Geunsik Lim Date: Thu, 18 Aug 2011 16:44:57 +0900 Subject: MAINTAINERS: Fix list of perf events source files Recent changes made kernel/perf_event.c be split and moved to kernel/events/. Cc: Ingo Molnar Cc: Jiri Kosina Cc: Joe Perches Cc: Li Zefan Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1313653497-27263-1-git-send-email-leemgs1@gmail.com Signed-off-by: Geunsik Lim Signed-off-by: Arnaldo Carvalho de Melo --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 1e55e1eeb811..7110675702ea 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4971,7 +4971,7 @@ M: Paul Mackerras M: Ingo Molnar M: Arnaldo Carvalho de Melo S: Supported -F: kernel/perf_event*.c +F: kernel/events/* F: include/linux/perf_event.h F: arch/*/kernel/perf_event*.c F: arch/*/kernel/*/perf_event*.c -- cgit v1.2.3 From 43bece79796c2a39ec98998fd3f1071f04f3d8c3 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Wed, 17 Aug 2011 18:42:07 +0800 Subject: perf tools: Add group event scheduling option to perf record/stat Group event scheduling command line option is missing in perf record/stat. Add it to perf record/stat, which is same as in perf top. Reported-by: Andi Kleen Cc: Andi Kleen Cc: Ingo Molnar Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1313577727.2754.5.camel@hp6530s Signed-off-by: Lin Ming Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 +++- tools/perf/builtin-stat.c | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f6426b496f4a..6b0519f885e4 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -45,7 +45,7 @@ static int freq = 1000; static int output; static int pipe_output = 0; static const char *output_name = NULL; -static int group = 0; +static bool group = false; static int realtime_prio = 0; static bool nodelay = false; static bool raw_samples = false; @@ -753,6 +753,8 @@ const struct option record_options[] = { "child tasks do not inherit counters"), OPT_UINTEGER('F', "freq", &user_freq, "profile at this frequency"), OPT_UINTEGER('m', "mmap-pages", &mmap_pages, "number of mmap data pages"), + OPT_BOOLEAN(0, "group", &group, + "put the counters into a counter group"), OPT_BOOLEAN('g', "call-graph", &call_graph, "do call-graph (stack chain/backtrace) recording"), OPT_INCR('v', "verbose", &verbose, diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1ad04ce29c34..5deb17d9e795 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -193,6 +193,7 @@ static int big_num_opt = -1; static const char *cpu_list; static const char *csv_sep = NULL; static bool csv_output = false; +static bool group = false; static volatile int done = 0; @@ -280,14 +281,14 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->inherit = !no_inherit; if (system_wide) - return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, false); + return perf_evsel__open_per_cpu(evsel, evsel_list->cpus, group); if (target_pid == -1 && target_tid == -1) { attr->disabled = 1; attr->enable_on_exec = 1; } - return perf_evsel__open_per_thread(evsel, evsel_list->threads, false); + return perf_evsel__open_per_thread(evsel, evsel_list->threads, group); } /* @@ -1043,6 +1044,8 @@ static const struct option options[] = { "stat events on existing thread id"), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), + OPT_BOOLEAN('g', "group", &group, + "put the counters into a counter group"), OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), OPT_INCR('v', "verbose", &verbose, -- cgit v1.2.3 From 3fe45aeaf2033c9eaa5028ed5ba68b466008876f Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 18 Aug 2011 15:13:17 +0200 Subject: ALSA: hda - Add "PCM" volume to vmaster slave list The new parser may use "PCM" volume, but it was missing the vmaster slave list, thus "Master" volume didn't control it. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=41342 Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 9a1aa09f47fe..fcb11af9ad24 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1784,6 +1784,7 @@ static const char * const alc_slave_vols[] = { "Speaker Playback Volume", "Mono Playback Volume", "Line-Out Playback Volume", + "PCM Playback Volume", NULL, }; @@ -1798,6 +1799,7 @@ static const char * const alc_slave_sws[] = { "Mono Playback Switch", "IEC958 Playback Switch", "Line-Out Playback Switch", + "PCM Playback Switch", NULL, }; -- cgit v1.2.3 From cb6db4e57632ba8589cc2f9fe1d0aa9116b87ab8 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 15 Aug 2011 17:27:21 +0000 Subject: btrfs: btrfs_permission's RO check shouldn't apply to device nodes This patch tightens the read-only access checks in btrfs_permission to match the constraints in inode_permission. Currently, even though the device node itself will be unmodified, read-write access to device nodes is denied to when the device node resides on a read-only subvolume or a is a file that has been marked read-only by the btrfs conversion utility. With this patch applied, the check only affects regular files, directories, and symlinks. It also restructures the code a bit so that we don't duplicate the MAY_WRITE check for both tests. Signed-off-by: Jeff Mahoney Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 15fceefbca0a..0ccc7438ad34 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7354,11 +7354,15 @@ static int btrfs_set_page_dirty(struct page *page) static int btrfs_permission(struct inode *inode, int mask) { struct btrfs_root *root = BTRFS_I(inode)->root; + umode_t mode = inode->i_mode; - if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) - return -EROFS; - if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) - return -EACCES; + if (mask & MAY_WRITE && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { + if (btrfs_root_readonly(root)) + return -EROFS; + if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) + return -EACCES; + } return generic_permission(inode, mask); } -- cgit v1.2.3 From 9a4327ca1f45f82edad7dc0a4e52ce9316e0950c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 18 Aug 2011 10:16:05 -0400 Subject: btrfs: unlock on error in btrfs_file_llseek() There were some unlocks on error missing in a recent patch to btrfs_file_llseek(). Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/file.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 658d66959abe..f7d9df7f3fdd 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1804,10 +1804,14 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) } } - if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) - return -EINVAL; - if (offset > inode->i_sb->s_maxbytes) - return -EINVAL; + if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) { + ret = -EINVAL; + goto out; + } + if (offset > inode->i_sb->s_maxbytes) { + ret = -EINVAL; + goto out; + } /* Special lock needed here? */ if (offset != file->f_pos) { -- cgit v1.2.3 From f1e490a7ebe41e06324abbbcd86005b0af02a375 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 18 Aug 2011 10:36:39 -0400 Subject: Btrfs: set i_size properly when fallocating and we already xfstests exposed a problem with preallocate when it fallocates a range that already has an extent. We don't set the new i_size properly because we see that we already have an extent. This isn't right and we should update i_size if the space already exists. With this patch we now pass xfstests 075. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/file.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0705d15542c6..15e5a1cd8764 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1631,11 +1631,15 @@ static long btrfs_fallocate(struct file *file, int mode, cur_offset = alloc_start; while (1) { + u64 actual_end; + em = btrfs_get_extent(inode, NULL, 0, cur_offset, alloc_end - cur_offset, 0); BUG_ON(IS_ERR_OR_NULL(em)); last_byte = min(extent_map_end(em), alloc_end); + actual_end = min_t(u64, extent_map_end(em), offset + len); last_byte = (last_byte + mask) & ~mask; + if (em->block_start == EXTENT_MAP_HOLE || (cur_offset >= inode->i_size && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { @@ -1648,6 +1652,16 @@ static long btrfs_fallocate(struct file *file, int mode, free_extent_map(em); break; } + } else if (actual_end > inode->i_size && + !(mode & FALLOC_FL_KEEP_SIZE)) { + /* + * We didn't need to allocate any more space, but we + * still extended the size of the file so we need to + * update i_size. + */ + inode->i_ctime = CURRENT_TIME; + i_size_write(inode, actual_end); + btrfs_ordered_update_i_size(inode, actual_end, NULL); } free_extent_map(em); -- cgit v1.2.3 From e574044acbad7421879270a80acd337459c94cc8 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Thu, 18 Aug 2011 15:02:47 +0300 Subject: ASoC: omap: Fix build errors in ams-delta Fix "error: too few arguments to function 'ams_delta_set_bias_level'" build errors in ams-delta.c that were introduced after commit d4c6005 ("ASoC: Add context parameter to card DAPM callbacks") by adding dapm context to ams_delta_set_bias_level calls. Signed-off-by: Jarkko Nikula Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/omap/ams-delta.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/soc/omap/ams-delta.c b/sound/soc/omap/ams-delta.c index 30fe0d0efe1c..0aa475f92efa 100644 --- a/sound/soc/omap/ams-delta.c +++ b/sound/soc/omap/ams-delta.c @@ -514,7 +514,7 @@ static int ams_delta_cx20442_init(struct snd_soc_pcm_runtime *rtd) } /* Set codec bias level */ - ams_delta_set_bias_level(card, SND_SOC_BIAS_STANDBY); + ams_delta_set_bias_level(card, dapm, SND_SOC_BIAS_STANDBY); /* Add hook switch - can be used to control the codec from userspace * even if line discipline fails */ @@ -649,7 +649,9 @@ static void __exit ams_delta_module_exit(void) ams_delta_hook_switch_gpios); /* Keep modem power on */ - ams_delta_set_bias_level(&ams_delta_audio_card, SND_SOC_BIAS_STANDBY); + ams_delta_set_bias_level(&ams_delta_audio_card, + &ams_delta_audio_card.rtd[0].codec->dapm, + SND_SOC_BIAS_STANDBY); platform_device_unregister(cx20442_platform_device); platform_device_unregister(ams_delta_audio_platform_device); -- cgit v1.2.3 From 13589c437daf4c8e429b3236c0b923de1c9420d8 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 18 Aug 2011 04:41:55 +0000 Subject: [CIFS] possible memory corruption on mount CIFS cleanup_volume_info_contents() looks like having a memory corruption problem. When UNCip is set to "&vol->UNC[2]" in cifs_parse_mount_options(), it should not be kfree()-ed in cleanup_volume_info_contents(). Introduced in commit b946845a9dc523c759cae2b6a0f6827486c3221a Signed-off-by: J.R. Okajima Reviewed-by: Jeff Layton CC: Stable Signed-off-by: Steve French --- fs/cifs/connect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 80c2e3add3a2..633c246b6775 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -2878,7 +2878,8 @@ cleanup_volume_info_contents(struct smb_vol *volume_info) kfree(volume_info->username); kzfree(volume_info->password); kfree(volume_info->UNC); - kfree(volume_info->UNCip); + if (volume_info->UNCip != volume_info->UNC + 2) + kfree(volume_info->UNCip); kfree(volume_info->domainname); kfree(volume_info->iocharset); kfree(volume_info->prepath); -- cgit v1.2.3 From 04c05b4a68c0ab0d6bb41c710a646e56f62a70a3 Mon Sep 17 00:00:00 2001 From: Steve French Date: Thu, 18 Aug 2011 04:44:35 +0000 Subject: update cifs version to 1.75 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index cb71dc1f94d1..95da8027983d 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "1.74" +#define CIFS_VERSION "1.75" #endif /* _CIFSFS_H */ -- cgit v1.2.3 From 8cf2d2399ab60842f55598bc1b00fd15503b9950 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 18 Aug 2011 09:17:00 +0200 Subject: i7core_edac: fixed typo in error count calculation Based on a patch from the PaX Team, found during a clang analysis pass. Signed-off-by: Mathias Krause Acked-by: Mauro Carvalho Chehab Cc: PaX Team Cc: stable@kernel.org [v2.6.35+] Signed-off-by: Linus Torvalds --- drivers/edac/i7core_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 04f1e7ce02b1..f6cf448d69b4 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1670,7 +1670,7 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci, char *type, *optype, *err, *msg; unsigned long error = m->status & 0x1ff0000l; u32 optypenum = (m->status >> 4) & 0x07; - u32 core_err_cnt = (m->status >> 38) && 0x7fff; + u32 core_err_cnt = (m->status >> 38) & 0x7fff; u32 dimm = (m->misc >> 16) & 0x3; u32 channel = (m->misc >> 18) & 0x3; u32 syndrome = m->misc >> 32; -- cgit v1.2.3 From d522a0d17963e9c2e556db2cbd60c96d40505b6c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 18 Aug 2011 12:19:27 -0700 Subject: irqdesc: fix new kernel-doc warning Fix kernel-doc warning in irqdesc.c: Warning(kernel/irq/irqdesc.c:353): No description found for parameter 'owner' Signed-off-by: Randy Dunlap Signed-off-by: Linus Torvalds --- kernel/irq/irqdesc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index cb65d0360e31..039b889ea053 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -344,6 +344,7 @@ EXPORT_SYMBOL_GPL(irq_free_descs); * @from: Start the search from this irq number * @cnt: Number of consecutive irqs to allocate. * @node: Preferred node on which the irq descriptor should be allocated + * @owner: Owning module (can be NULL) * * Returns the first irq number or error code */ -- cgit v1.2.3 From ebd1699ec5f1a6f1f2df6b48fa54bc6ff790143c Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 18 Aug 2011 23:52:36 -0400 Subject: [libata] sata_sil: fix used-uninit warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Init 'serror' to silence the following warning: drivers/ata/sata_sil.c: In function ‘sil_interrupt’: drivers/ata/sata_sil.c:453:14: warning: ‘serror’ may be used uninitialized in this function [-Wuninitialized] This is not a 'can never happen' but is nonetheless extremely unlikely. The easiest and cleanest warning fix is simply to init the var, rather than worry about marking the var uninit-ok. Signed-off-by: Jeff Garzik --- drivers/ata/sata_sil.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c index 98c1d780f552..9dfb40b8c2c9 100644 --- a/drivers/ata/sata_sil.c +++ b/drivers/ata/sata_sil.c @@ -438,7 +438,7 @@ static void sil_host_intr(struct ata_port *ap, u32 bmdma2) u8 status; if (unlikely(bmdma2 & SIL_DMA_SATA_IRQ)) { - u32 serror; + u32 serror = 0xffffffff; /* SIEN doesn't mask SATA IRQs on some 3112s. Those * controllers continue to assert IRQ as long as -- cgit v1.2.3 From 6d0e194d2eefcaab6dbdca1f639748660144acb5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 4 Aug 2011 11:15:07 +0200 Subject: pata_via: disable ATAPI DMA on AVERATEC 3200 On AVERATEC 3200, pata_via causes memory corruption with ATAPI DMA, which often leads to random kernel oops. The cause of the problem is not well understood yet and only small subset of machines using the controller seem affected. Blacklist ATAPI DMA on the machine. Signed-off-by: Tejun Heo Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=11426 Reported-and-tested-by: Jim Bray Cc: Alan Cox Cc: stable@kernel.org Signed-off-by: Jeff Garzik --- drivers/ata/pata_via.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c index 65e4be6be220..8e9f5048a10a 100644 --- a/drivers/ata/pata_via.c +++ b/drivers/ata/pata_via.c @@ -124,6 +124,17 @@ static const struct via_isa_bridge { { NULL } }; +static const struct dmi_system_id no_atapi_dma_dmi_table[] = { + { + .ident = "AVERATEC 3200", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AVERATEC"), + DMI_MATCH(DMI_BOARD_NAME, "3200"), + }, + }, + { } +}; + struct via_port { u8 cached_device; }; @@ -355,6 +366,13 @@ static unsigned long via_mode_filter(struct ata_device *dev, unsigned long mask) mask &= ~ ATA_MASK_UDMA; } } + + if (dev->class == ATA_DEV_ATAPI && + dmi_check_system(no_atapi_dma_dmi_table)) { + ata_dev_warn(dev, "controller locks up on ATAPI DMA, forcing PIO\n"); + mask &= ATA_MASK_PIO; + } + return mask; } -- cgit v1.2.3 From e39c75cf3e045c2fb3988770b207dfd09c30d4ac Mon Sep 17 00:00:00 2001 From: "Arnaud Patard (Rtp)" Date: Tue, 26 Jul 2011 16:58:19 +0200 Subject: ata: Add iMX pata support Add basic support for pata on iMX. It has been tested only on imx51. SDMA support will probably be added later so this version supports only PIO. v2: - enable only when needed IORDY - use dev_get_drvdata v3: - add missing clk_put() calls - use platform_get_irq() - fix resume code to avoid disabling IORDY on resume v4: - Remove EXPERIMENTAL and switch to depends on ARCH_MXC - Use devm_kzalloc() - make clock a must-have - Use only 1 ioremap Signed-off-by: Arnaud Patard Signed-off-by: Jeff Garzik --- drivers/ata/Kconfig | 9 ++ drivers/ata/Makefile | 1 + drivers/ata/pata_imx.c | 253 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 263 insertions(+) create mode 100644 drivers/ata/pata_imx.c diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index ca3e6be44a04..5987e0ba8c2d 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -468,6 +468,15 @@ config PATA_ICSIDE interface card. This is not required for ICS partition support. If you are unsure, say N to this. +config PATA_IMX + tristate "PATA support for Freescale iMX" + depends on ARCH_MXC + help + This option enables support for the PATA host available on Freescale + iMX SoCs. + + If unsure, say N. + config PATA_IT8213 tristate "IT8213 PATA support (Experimental)" depends on PCI && EXPERIMENTAL diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 8ac64e1aa051..9550d691fd19 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -48,6 +48,7 @@ obj-$(CONFIG_PATA_HPT37X) += pata_hpt37x.o obj-$(CONFIG_PATA_HPT3X2N) += pata_hpt3x2n.o obj-$(CONFIG_PATA_HPT3X3) += pata_hpt3x3.o obj-$(CONFIG_PATA_ICSIDE) += pata_icside.o +obj-$(CONFIG_PATA_IMX) += pata_imx.o obj-$(CONFIG_PATA_IT8213) += pata_it8213.o obj-$(CONFIG_PATA_IT821X) += pata_it821x.o obj-$(CONFIG_PATA_JMICRON) += pata_jmicron.o diff --git a/drivers/ata/pata_imx.c b/drivers/ata/pata_imx.c new file mode 100644 index 000000000000..ca9d9caedfa3 --- /dev/null +++ b/drivers/ata/pata_imx.c @@ -0,0 +1,253 @@ +/* + * Freescale iMX PATA driver + * + * Copyright (C) 2011 Arnaud Patard + * + * Based on pata_platform - Copyright (C) 2006 - 2007 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * TODO: + * - dmaengine support + * - check if timing stuff needed + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_NAME "pata_imx" + +#define PATA_IMX_ATA_CONTROL 0x24 +#define PATA_IMX_ATA_CTRL_FIFO_RST_B (1<<7) +#define PATA_IMX_ATA_CTRL_ATA_RST_B (1<<6) +#define PATA_IMX_ATA_CTRL_IORDY_EN (1<<0) +#define PATA_IMX_ATA_INT_EN 0x2C +#define PATA_IMX_ATA_INTR_ATA_INTRQ2 (1<<3) +#define PATA_IMX_DRIVE_DATA 0xA0 +#define PATA_IMX_DRIVE_CONTROL 0xD8 + +struct pata_imx_priv { + struct clk *clk; + /* timings/interrupt/control regs */ + u8 *host_regs; + u32 ata_ctl; +}; + +static int pata_imx_set_mode(struct ata_link *link, struct ata_device **unused) +{ + struct ata_device *dev; + struct ata_port *ap = link->ap; + struct pata_imx_priv *priv = ap->host->private_data; + u32 val; + + ata_for_each_dev(dev, link, ENABLED) { + dev->pio_mode = dev->xfer_mode = XFER_PIO_0; + dev->xfer_shift = ATA_SHIFT_PIO; + dev->flags |= ATA_DFLAG_PIO; + + val = __raw_readl(priv->host_regs + PATA_IMX_ATA_CONTROL); + if (ata_pio_need_iordy(dev)) + val |= PATA_IMX_ATA_CTRL_IORDY_EN; + else + val &= ~PATA_IMX_ATA_CTRL_IORDY_EN; + __raw_writel(val, priv->host_regs + PATA_IMX_ATA_CONTROL); + + ata_dev_printk(dev, KERN_INFO, "configured for PIO\n"); + } + return 0; +} + +static struct scsi_host_template pata_imx_sht = { + ATA_PIO_SHT(DRV_NAME), +}; + +static struct ata_port_operations pata_imx_port_ops = { + .inherits = &ata_sff_port_ops, + .sff_data_xfer = ata_sff_data_xfer_noirq, + .cable_detect = ata_cable_unknown, + .set_mode = pata_imx_set_mode, +}; + +static void pata_imx_setup_port(struct ata_ioports *ioaddr) +{ + /* Fixup the port shift for platforms that need it */ + ioaddr->data_addr = ioaddr->cmd_addr + (ATA_REG_DATA << 2); + ioaddr->error_addr = ioaddr->cmd_addr + (ATA_REG_ERR << 2); + ioaddr->feature_addr = ioaddr->cmd_addr + (ATA_REG_FEATURE << 2); + ioaddr->nsect_addr = ioaddr->cmd_addr + (ATA_REG_NSECT << 2); + ioaddr->lbal_addr = ioaddr->cmd_addr + (ATA_REG_LBAL << 2); + ioaddr->lbam_addr = ioaddr->cmd_addr + (ATA_REG_LBAM << 2); + ioaddr->lbah_addr = ioaddr->cmd_addr + (ATA_REG_LBAH << 2); + ioaddr->device_addr = ioaddr->cmd_addr + (ATA_REG_DEVICE << 2); + ioaddr->status_addr = ioaddr->cmd_addr + (ATA_REG_STATUS << 2); + ioaddr->command_addr = ioaddr->cmd_addr + (ATA_REG_CMD << 2); +} + +static int __devinit pata_imx_probe(struct platform_device *pdev) +{ + struct ata_host *host; + struct ata_port *ap; + struct pata_imx_priv *priv; + int irq = 0; + struct resource *io_res; + + io_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (io_res == NULL) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq <= 0) + return -EINVAL; + + priv = devm_kzalloc(&pdev->dev, + sizeof(struct pata_imx_priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(priv->clk)) { + dev_err(&pdev->dev, "Failed to get clock\n"); + return PTR_ERR(priv->clk); + } + + clk_enable(priv->clk); + + host = ata_host_alloc(&pdev->dev, 1); + if (!host) + goto free_priv; + + host->private_data = priv; + ap = host->ports[0]; + + ap->ops = &pata_imx_port_ops; + ap->pio_mask = ATA_PIO0; + ap->flags |= ATA_FLAG_SLAVE_POSS; + + priv->host_regs = devm_ioremap(&pdev->dev, io_res->start, + resource_size(io_res)); + if (!priv->host_regs) { + dev_err(&pdev->dev, "failed to map IO/CTL base\n"); + goto free_priv; + } + + ap->ioaddr.cmd_addr = priv->host_regs + PATA_IMX_DRIVE_DATA; + ap->ioaddr.ctl_addr = priv->host_regs + PATA_IMX_DRIVE_CONTROL; + + ap->ioaddr.altstatus_addr = ap->ioaddr.ctl_addr; + + pata_imx_setup_port(&ap->ioaddr); + + ata_port_desc(ap, "cmd 0x%llx ctl 0x%llx", + (unsigned long long)io_res->start + PATA_IMX_DRIVE_DATA, + (unsigned long long)io_res->start + PATA_IMX_DRIVE_CONTROL); + + /* deassert resets */ + __raw_writel(PATA_IMX_ATA_CTRL_FIFO_RST_B | + PATA_IMX_ATA_CTRL_ATA_RST_B, + priv->host_regs + PATA_IMX_ATA_CONTROL); + /* enable interrupts */ + __raw_writel(PATA_IMX_ATA_INTR_ATA_INTRQ2, + priv->host_regs + PATA_IMX_ATA_INT_EN); + + /* activate */ + return ata_host_activate(host, irq, ata_sff_interrupt, 0, + &pata_imx_sht); + +free_priv: + clk_disable(priv->clk); + clk_put(priv->clk); + return -ENOMEM; +} + +static int __devexit pata_imx_remove(struct platform_device *pdev) +{ + struct ata_host *host = dev_get_drvdata(&pdev->dev); + struct pata_imx_priv *priv = host->private_data; + + ata_host_detach(host); + + __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN); + + clk_disable(priv->clk); + clk_put(priv->clk); + + return 0; +} + +#ifdef CONFIG_PM +static int pata_imx_suspend(struct device *dev) +{ + struct ata_host *host = dev_get_drvdata(dev); + struct pata_imx_priv *priv = host->private_data; + int ret; + + ret = ata_host_suspend(host, PMSG_SUSPEND); + if (!ret) { + __raw_writel(0, priv->host_regs + PATA_IMX_ATA_INT_EN); + priv->ata_ctl = + __raw_readl(priv->host_regs + PATA_IMX_ATA_CONTROL); + clk_disable(priv->clk); + } + + return ret; +} + +static int pata_imx_resume(struct device *dev) +{ + struct ata_host *host = dev_get_drvdata(dev); + struct pata_imx_priv *priv = host->private_data; + + clk_enable(priv->clk); + + __raw_writel(priv->ata_ctl, priv->host_regs + PATA_IMX_ATA_CONTROL); + + __raw_writel(PATA_IMX_ATA_INTR_ATA_INTRQ2, + priv->host_regs + PATA_IMX_ATA_INT_EN); + + ata_host_resume(host); + + return 0; +} + +static const struct dev_pm_ops pata_imx_pm_ops = { + .suspend = pata_imx_suspend, + .resume = pata_imx_resume, +}; +#endif + +static struct platform_driver pata_imx_driver = { + .probe = pata_imx_probe, + .remove = __devexit_p(pata_imx_remove), + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &pata_imx_pm_ops, +#endif + }, +}; + +static int __init pata_imx_init(void) +{ + return platform_driver_register(&pata_imx_driver); +} + +static void __exit pata_imx_exit(void) +{ + platform_driver_unregister(&pata_imx_driver); +} +module_init(pata_imx_init); +module_exit(pata_imx_exit); + +MODULE_AUTHOR("Arnaud Patard "); +MODULE_DESCRIPTION("low-level driver for iMX PATA"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:" DRV_NAME); -- cgit v1.2.3 From a081da630d64acf132b2db1043c586b993d49da7 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 8 Aug 2011 13:17:57 +0200 Subject: drivers/ata/sata_dwc_460ex.c: add missing kfree Currently, error handling code in this function calls the function sata_dwc_port_stop, but this function has essentially no effect if hsdevp has not been stored in ap, which is the case throughout this function. The only effect is to print a debugging message including ap->print_id. The code is rewritten to not call sata_dwc_port_stop, but instead to jump to a local label that prints the original error message and the print_id information. In the case where hsdevp has been already allocated (but not yet stored in ap), this value is freed as well. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @exists@ local idexpression x; statement S,S1; expression E; identifier fl; expression *ptr != NULL; @@ x = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S <... when != x when != if (...) { <+...kfree(x)...+> } when any when != true x == NULL x->fl ...> ( if (x == NULL) S1 | if (...) { ... when != x when forall ( return \(0\|<+...x...+>\|ptr\); | * return ...; ) } ) // Signed-off-by: Julia Lawall Signed-off-by: Jeff Garzik --- drivers/ata/sata_dwc_460ex.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/ata/sata_dwc_460ex.c b/drivers/ata/sata_dwc_460ex.c index 0a9a774a7e1e..5c4237452f50 100644 --- a/drivers/ata/sata_dwc_460ex.c +++ b/drivers/ata/sata_dwc_460ex.c @@ -1329,7 +1329,7 @@ static int sata_dwc_port_start(struct ata_port *ap) dev_err(ap->dev, "%s: dma_alloc_coherent failed\n", __func__); err = -ENOMEM; - goto CLEANUP; + goto CLEANUP_ALLOC; } } @@ -1349,15 +1349,13 @@ static int sata_dwc_port_start(struct ata_port *ap) /* Clear any error bits before libata starts issuing commands */ clear_serror(); ap->private_data = hsdevp; + dev_dbg(ap->dev, "%s: done\n", __func__); + return 0; +CLEANUP_ALLOC: + kfree(hsdevp); CLEANUP: - if (err) { - sata_dwc_port_stop(ap); - dev_dbg(ap->dev, "%s: fail\n", __func__); - } else { - dev_dbg(ap->dev, "%s: done\n", __func__); - } - + dev_dbg(ap->dev, "%s: fail. ap->id = %d\n", __func__, ap->print_id); return err; } -- cgit v1.2.3 From 4a0342ca8e8150bd47e7118a76e300692a1b6b7b Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 17 Aug 2011 22:14:57 +0000 Subject: sparc: fix array bounds error setting up PCIC NMI trap CC arch/sparc/kernel/pcic.o arch/sparc/kernel/pcic.c: In function 'pcic_probe': arch/sparc/kernel/pcic.c:359:33: error: array subscript is above array bounds [-Werror=array-bounds] arch/sparc/kernel/pcic.c:359:8: error: array subscript is above array bounds [-Werror=array-bounds] arch/sparc/kernel/pcic.c:360:33: error: array subscript is above array bounds [-Werror=array-bounds] arch/sparc/kernel/pcic.c:360:8: error: array subscript is above array bounds [-Werror=array-bounds] arch/sparc/kernel/pcic.c:361:33: error: array subscript is above array bounds [-Werror=array-bounds] arch/sparc/kernel/pcic.c:361:8: error: array subscript is above array bounds [-Werror=array-bounds] cc1: all warnings being treated as errors I'm not particularly familiar with sparc but t_nmi (defined in head_32.S via the TRAP_ENTRY macro) and pcic_nmi_trap_patch (defined in entry.S) both appear to be 4 instructions long and I presume from the usage that instructions are int sized. Signed-off-by: Ian Campbell Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Reviewed-by: Sam Ravnborg Signed-off-by: David S. Miller --- arch/sparc/kernel/pcic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index a19f04195478..1aaf8c180be5 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -352,8 +352,8 @@ int __init pcic_probe(void) strcpy(pbm->prom_name, namebuf); { - extern volatile int t_nmi[1]; - extern int pcic_nmi_trap_patch[1]; + extern volatile int t_nmi[4]; + extern int pcic_nmi_trap_patch[4]; t_nmi[0] = pcic_nmi_trap_patch[0]; t_nmi[1] = pcic_nmi_trap_patch[1]; -- cgit v1.2.3 From 38b65190c6ab0be8ce7cff69e734ca5b5e7fa309 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 19 Aug 2011 07:55:10 +0200 Subject: ALSA: usb-audio - Fix missing mixer dB information The recent fix for testing dB range at the mixer creation time seems to cause regressions in some devices. In such devices, reading the dB info at probing time gives an error, thus both dBmin and dBmax are still zero, and TLV flag isn't set although the later read of dB info succeeds. This patch adds a workaround for such a case by assuming that the later read will succeed. In future, a similar test should be performed in a case where a wrong dB range is seen even in the later read. Signed-off-by: Takashi Iwai Cc: --- sound/usb/mixer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index c04d7c71ac88..cdd19d7fe500 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -152,6 +152,7 @@ static inline void check_mapped_dB(const struct usbmix_name_map *p, if (p && p->dB) { cval->dBmin = p->dB->min; cval->dBmax = p->dB->max; + cval->initialized = 1; } } @@ -1092,7 +1093,7 @@ static void build_feature_ctl(struct mixer_build *state, void *raw_desc, " Switch" : " Volume"); if (control == UAC_FU_VOLUME) { check_mapped_dB(map, cval); - if (cval->dBmin < cval->dBmax) { + if (cval->dBmin < cval->dBmax || !cval->initialized) { kctl->tlv.c = mixer_vol_tlv; kctl->vd[0].access |= SNDRV_CTL_ELEM_ACCESS_TLV_READ | -- cgit v1.2.3 From b53d1ed734a2b9af8da115b836b658daa7d47a48 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 19 Aug 2011 08:34:48 +0200 Subject: Revert "cfq: Remove special treatment for metadata rqs." We have a kernel build regression since 3.1-rc1, which is about 10% regression. The kernel source is in an ext3 filesystem. Alex Shi bisect it to commit: commit a07405b7802691d29ab3b23bdc76ee6d006aad0b Author: Justin TerAvest Date: Sun Jul 10 22:09:19 2011 +0200 cfq: Remove special treatment for metadata rqs. Apparently this is caused by lack metadata preemption, where ext3/ext4 do use READ_META. I didn't see a way to fix the issue, so suggest reverting the patch. This reverts commit a07405b7802691d29ab3b23bdc76ee6d006aad0b. Reported-by: Alex Shi Reported-by: Shaohua Li Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 650834537606..a33bd4377c61 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -130,6 +130,8 @@ struct cfq_queue { unsigned long slice_end; long slice_resid; + /* pending metadata requests */ + int meta_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; @@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, if (rq_is_sync(rq1) != rq_is_sync(rq2)) return rq_is_sync(rq1) ? rq1 : rq2; + if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) + return rq1->cmd_flags & REQ_META ? rq1 : rq2; + s1 = blk_rq_pos(rq1); s2 = blk_rq_pos(rq2); @@ -1607,6 +1612,10 @@ static void cfq_remove_request(struct request *rq) cfqq->cfqd->rq_queued--; cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); + if (rq->cmd_flags & REQ_META) { + WARN_ON(!cfqq->meta_pending); + cfqq->meta_pending--; + } } static int cfq_merge(struct request_queue *q, struct request **req, @@ -3359,6 +3368,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, RB_EMPTY_ROOT(&cfqq->sort_list)) return true; + /* + * So both queues are sync. Let the new request get disk time if + * it's a metadata request and the current queue is doing regular IO. + */ + if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) + return true; + /* * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. */ @@ -3423,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic = RQ_CIC(rq); cfqd->rq_queued++; + if (rq->cmd_flags & REQ_META) + cfqq->meta_pending++; cfq_update_io_thinktime(cfqd, cfqq, cic); cfq_update_io_seektime(cfqd, cfqq, rq); -- cgit v1.2.3 From 6c58addca802950917765380257bebec0998a7da Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Wed, 17 Aug 2011 10:07:58 +0100 Subject: ARM: 7019/1: Footbridge: select CLKEVT_I8253 for ARCH_NETWINDER Since commit 8560a6cfc9818edde1fd8677961714b264ffa03d "arm: Footbridge: Use common i8253 clockevent", ARCH_NETWINDER needs to select CLKEVT_I8253. This patch fixes below build error with "make netwinder_defconfig". LD .tmp_vmlinux1 arch/arm/mach-footbridge/built-in.o: In function `isa_timer_init': isa-rtc.c:(.init.text+0x12c8): undefined reference to `clockevent_i8253_init' isa-rtc.c:(.init.text+0x12d0): undefined reference to `i8253_clockevent' arch/arm/mach-footbridge/built-in.o:(.data+0x198): undefined reference to `i8253_clockevent' make: *** [.tmp_vmlinux1] Error 1 Signed-off-by: Axel Lin Acked-by: Thomas Gleixner Signed-off-by: Russell King --- arch/arm/mach-footbridge/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig index dc26fff22cf0..c8e7afcf14ec 100644 --- a/arch/arm/mach-footbridge/Kconfig +++ b/arch/arm/mach-footbridge/Kconfig @@ -62,6 +62,7 @@ config ARCH_EBSA285_HOST config ARCH_NETWINDER bool "NetWinder" select CLKSRC_I8253 + select CLKEVT_I8253 select FOOTBRIDGE_HOST select ISA select ISA_DMA -- cgit v1.2.3 From ac0d1516a2903226c19f3b4a4b323a9ffbede7d0 Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Thu, 28 Jul 2011 08:16:34 +0900 Subject: ARM: S5P64X0: Replace irq_gc_ack() with irq_gc_ack_set_bit() According to commit 659fb32d1b67476f4ade25e9ea0e2642a5b9c4b5 ("replace irq_gc_ack() with {set,clr}_bit variants"), this should be fixed. Signed-off-by: Kukjin Kim --- arch/arm/mach-s5p64x0/irq-eint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-s5p64x0/irq-eint.c b/arch/arm/mach-s5p64x0/irq-eint.c index 69ed4545112b..fe7380f5c3cd 100644 --- a/arch/arm/mach-s5p64x0/irq-eint.c +++ b/arch/arm/mach-s5p64x0/irq-eint.c @@ -129,7 +129,7 @@ static int s5p64x0_alloc_gc(void) } ct = gc->chip_types; - ct->chip.irq_ack = irq_gc_ack; + ct->chip.irq_ack = irq_gc_ack_set_bit; ct->chip.irq_mask = irq_gc_mask_set_bit; ct->chip.irq_unmask = irq_gc_mask_clr_bit; ct->chip.irq_set_type = s5p64x0_irq_eint_set_type; -- cgit v1.2.3 From b8a297d3f842f4f7dae98cf85701da069204b0b1 Mon Sep 17 00:00:00 2001 From: Kukjin Kim Date: Fri, 29 Jul 2011 10:23:45 +0900 Subject: ARM: SAMSUNG: Fix Section mismatch in samsung_bl_set() WARNING: vmlinux.o(.text+0xf47c): Section mismatch in reference from the function samsung_bl_set() to the (unknown reference) .init.data:(unknown) The function samsung_bl_set() references the (unknown reference) __initdata (unknown). This is often because samsung_bl_set lacks a __initdata annotation or the annotation of (unknown) is wrong. Signed-off-by: Kukjin Kim --- arch/arm/plat-samsung/include/plat/backlight.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-samsung/include/plat/backlight.h b/arch/arm/plat-samsung/include/plat/backlight.h index 51d8da846a62..ad530c78fe8c 100644 --- a/arch/arm/plat-samsung/include/plat/backlight.h +++ b/arch/arm/plat-samsung/include/plat/backlight.h @@ -20,7 +20,7 @@ struct samsung_bl_gpio_info { int func; }; -extern void samsung_bl_set(struct samsung_bl_gpio_info *gpio_info, +extern void __init samsung_bl_set(struct samsung_bl_gpio_info *gpio_info, struct platform_pwm_backlight_data *bl_data); #endif /* __ASM_PLAT_BACKLIGHT_H */ -- cgit v1.2.3 From c1a238aadf32daf23db13617fc0b401080c9ab04 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Thu, 11 Aug 2011 16:36:41 +0900 Subject: ARM: EXYNOS4: Use the correct regulator names on universal_c210 Use the correct regulator names for cpufreq Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/mach-universal_c210.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-exynos4/mach-universal_c210.c b/arch/arm/mach-exynos4/mach-universal_c210.c index 0e280d12301e..b3b5d8911004 100644 --- a/arch/arm/mach-exynos4/mach-universal_c210.c +++ b/arch/arm/mach-exynos4/mach-universal_c210.c @@ -79,7 +79,7 @@ static struct s3c2410_uartcfg universal_uartcfgs[] __initdata = { }; static struct regulator_consumer_supply max8952_consumer = - REGULATOR_SUPPLY("vddarm", NULL); + REGULATOR_SUPPLY("vdd_arm", NULL); static struct max8952_platform_data universal_max8952_pdata __initdata = { .gpio_vid0 = EXYNOS4_GPX0(3), @@ -105,7 +105,7 @@ static struct max8952_platform_data universal_max8952_pdata __initdata = { }; static struct regulator_consumer_supply lp3974_buck1_consumer = - REGULATOR_SUPPLY("vddint", NULL); + REGULATOR_SUPPLY("vdd_int", NULL); static struct regulator_consumer_supply lp3974_buck2_consumer = REGULATOR_SUPPLY("vddg3d", NULL); -- cgit v1.2.3 From af8a9f63b45758591b8412d7ae3a0585227f09a2 Mon Sep 17 00:00:00 2001 From: Jonghwan Choi Date: Fri, 12 Aug 2011 18:15:42 +0900 Subject: ARM: EXYNOS4: Fix wrong devname to support clkdev Signed-off-by: Jonghwan Choi Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-exynos4/clock.c b/arch/arm/mach-exynos4/clock.c index 851dea018578..1561b036a9bf 100644 --- a/arch/arm/mach-exynos4/clock.c +++ b/arch/arm/mach-exynos4/clock.c @@ -520,7 +520,7 @@ static struct clk init_clocks_off[] = { .ctrlbit = (1 << 21), }, { .name = "ac97", - .id = -1, + .devname = "samsung-ac97", .enable = exynos4_clk_ip_peril_ctrl, .ctrlbit = (1 << 27), }, { -- cgit v1.2.3 From 6b875cb741249ec274393ba3abb929beeb465d25 Mon Sep 17 00:00:00 2001 From: Huang Weiyi Date: Fri, 12 Aug 2011 18:43:57 +0900 Subject: ARM: EXYNOS4: remove duplicated inclusion Remove duplicated #include('s) in arch/arm/mach-exynos4/cpu.c Signed-off-by: Huang Weiyi Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/cpu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mach-exynos4/cpu.c b/arch/arm/mach-exynos4/cpu.c index 2d8a40c9e6e5..84032d3aecd9 100644 --- a/arch/arm/mach-exynos4/cpu.c +++ b/arch/arm/mach-exynos4/cpu.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 5a1993f0c64f32cb4fb8a9f6caa981f377a11710 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki Date: Fri, 12 Aug 2011 19:03:16 +0900 Subject: ARM: EXYNOS4: Fix the IRQ definitions for MIPI CSIS device This is a regression fix after migration to the external GIC. The breakage has been introduced in commit 69644a8e23ab ("ARM: EXYNOS4: modify interrupt mappings for external GIC") Signed-off-by: Sylwester Nawrocki Signed-off-by: Kyungmin Park [kgene.kim@samsung.com: added commit id] Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/include/mach/irqs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-exynos4/include/mach/irqs.h b/arch/arm/mach-exynos4/include/mach/irqs.h index 934d2a493982..f8952f8f3757 100644 --- a/arch/arm/mach-exynos4/include/mach/irqs.h +++ b/arch/arm/mach-exynos4/include/mach/irqs.h @@ -80,9 +80,8 @@ #define IRQ_HSMMC3 IRQ_SPI(76) #define IRQ_DWMCI IRQ_SPI(77) -#define IRQ_MIPICSI0 IRQ_SPI(78) - -#define IRQ_MIPICSI1 IRQ_SPI(80) +#define IRQ_MIPI_CSIS0 IRQ_SPI(78) +#define IRQ_MIPI_CSIS1 IRQ_SPI(80) #define IRQ_ONENAND_AUDI IRQ_SPI(82) #define IRQ_ROTATOR IRQ_SPI(83) -- cgit v1.2.3 From 7e1291dea213c46b6649a9f6ec94b16f0d88f97c Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan Date: Sat, 13 Aug 2011 10:34:56 +0900 Subject: ARM: S5PV210: Fix build warning Fixed the following warning for S5PV210. arch/arm/mach-s5pv210/pm.c: In function 's5pv210_pm_add': arch/arm/mach-s5pv210/pm.c:139: warning: assignment from incompatible pointer type Also, staticized the function. Signed-off-by: Abhilash Kesavan Signed-off-by: Kukjin Kim --- arch/arm/mach-s5pv210/pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-s5pv210/pm.c b/arch/arm/mach-s5pv210/pm.c index 309e388a8a83..f149d278377b 100644 --- a/arch/arm/mach-s5pv210/pm.c +++ b/arch/arm/mach-s5pv210/pm.c @@ -88,7 +88,7 @@ static struct sleep_save s5pv210_core_save[] = { SAVE_ITEM(S3C2410_TCNTO(0)), }; -void s5pv210_cpu_suspend(unsigned long arg) +static int s5pv210_cpu_suspend(unsigned long arg) { unsigned long tmp; -- cgit v1.2.3 From f98d429d7a7ff43d6e7c9bab239223f44a85264e Mon Sep 17 00:00:00 2001 From: Abhilash Kesavan Date: Sat, 13 Aug 2011 10:40:52 +0900 Subject: ARM: S3C64XX: Fix build break in PM debug When S3C_PM_DEBUG_LED_SMDK is enabled for suspend/resume debugging, the following compilation error occurs: arch/arm/mach-s3c64xx/pm.c: In function 's3c_pm_debug_smdkled': arch/arm/mach-s3c64xx/pm.c:41: error: implicit declaration of function 'gpio_set_value' arch/arm/mach-s3c64xx/pm.c:41: error: implicit declaration of function 'S3C64XX_GPN' arch/arm/mach-s3c64xx/pm.c: In function 's3c64xx_pm_init': arch/arm/mach-s3c64xx/pm.c:184: error: implicit declaration of function 'gpio_request' arch/arm/mach-s3c64xx/pm.c:188: error: implicit declaration of function 'gpio_direction_output' Fix the error by including linux/gpio.h Signed-off-by: Abhilash Kesavan Signed-off-by: Kukjin Kim --- arch/arm/mach-s3c64xx/pm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-s3c64xx/pm.c b/arch/arm/mach-s3c64xx/pm.c index 8bad64370689..055e2858b0dd 100644 --- a/arch/arm/mach-s3c64xx/pm.c +++ b/arch/arm/mach-s3c64xx/pm.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 995b528ad25968472742c50fe964d44fac2b857a Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 18 Aug 2011 13:02:12 +0900 Subject: ARM: SAMSUNG: Add chained enrty/exit call to timer interrupt handler This patch adds chained IRQ enter/exit functions to timer interrupt handler in order to function correctly on primary controllers with different methods of flow control. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim --- arch/arm/plat-samsung/irq-vic-timer.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/plat-samsung/irq-vic-timer.c b/arch/arm/plat-samsung/irq-vic-timer.c index f714d060370d..51583cd30164 100644 --- a/arch/arm/plat-samsung/irq-vic-timer.c +++ b/arch/arm/plat-samsung/irq-vic-timer.c @@ -22,9 +22,14 @@ #include #include +#include + static void s3c_irq_demux_vic_timer(unsigned int irq, struct irq_desc *desc) { + struct irq_chip *chip = irq_get_chip(irq); + chained_irq_enter(chip, desc); generic_handle_irq((int)desc->irq_data.handler_data); + chained_irq_exit(chip, desc); } /* We assume the IRQ_TIMER0..IRQ_TIMER4 range is continuous. */ -- cgit v1.2.3 From 70b0e82bc7d03d33de5bceea92d419a9be4340ee Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Sat, 13 Aug 2011 12:55:36 +0900 Subject: ARM: EXYNOS4: add required chained_irq_enter/exit to eint code This patch adds chained IRQ enter/exit functions to external interrupt handler in order to function correctly on primary controllers with different methods of flow control. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/irq-eint.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/mach-exynos4/irq-eint.c b/arch/arm/mach-exynos4/irq-eint.c index 9d87d2ac7f68..badb8c66fc9b 100644 --- a/arch/arm/mach-exynos4/irq-eint.c +++ b/arch/arm/mach-exynos4/irq-eint.c @@ -23,6 +23,8 @@ #include +#include + static DEFINE_SPINLOCK(eint_lock); static unsigned int eint0_15_data[16]; @@ -184,8 +186,11 @@ static inline void exynos4_irq_demux_eint(unsigned int start) static void exynos4_irq_demux_eint16_31(unsigned int irq, struct irq_desc *desc) { + struct irq_chip *chip = irq_get_chip(irq); + chained_irq_enter(chip, desc); exynos4_irq_demux_eint(IRQ_EINT(16)); exynos4_irq_demux_eint(IRQ_EINT(24)); + chained_irq_exit(chip, desc); } static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc) @@ -193,6 +198,7 @@ static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc) u32 *irq_data = irq_get_handler_data(irq); struct irq_chip *chip = irq_get_chip(irq); + chained_irq_enter(chip, desc); chip->irq_mask(&desc->irq_data); if (chip->irq_ack) @@ -201,6 +207,7 @@ static void exynos4_irq_eint0_15(unsigned int irq, struct irq_desc *desc) generic_handle_irq(*irq_data); chip->irq_unmask(&desc->irq_data); + chained_irq_exit(chip, desc); } int __init exynos4_init_irq_eint(void) -- cgit v1.2.3 From 3f6065dd9d2c947c8d68336f07bd721d3909a30d Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Sat, 13 Aug 2011 12:55:36 +0900 Subject: ARM: S5P: add required chained_irq_enter/exit to gpio-int code This patch adds chained IRQ enter/exit functions to gpio interrupt handler in order to function correctly on primary controllers with different methods of flow control. Signed-off-by: Marek Szyprowski Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim --- arch/arm/plat-s5p/irq-gpioint.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/plat-s5p/irq-gpioint.c b/arch/arm/plat-s5p/irq-gpioint.c index 327ab9f662e8..f71078ef6bb5 100644 --- a/arch/arm/plat-s5p/irq-gpioint.c +++ b/arch/arm/plat-s5p/irq-gpioint.c @@ -23,6 +23,8 @@ #include #include +#include + #define GPIO_BASE(chip) (((unsigned long)(chip)->base) & 0xFFFFF000u) #define CON_OFFSET 0x700 @@ -81,6 +83,9 @@ static void s5p_gpioint_handler(unsigned int irq, struct irq_desc *desc) int group, pend_offset, mask_offset; unsigned int pend, mask; + struct irq_chip *chip = irq_get_chip(irq); + chained_irq_enter(chip, desc); + for (group = 0; group < bank->nr_groups; group++) { struct s3c_gpio_chip *chip = bank->chips[group]; if (!chip) @@ -102,6 +107,7 @@ static void s5p_gpioint_handler(unsigned int irq, struct irq_desc *desc) pend &= ~BIT(offset); } } + chained_irq_exit(chip, desc); } static __init int s5p_gpioint_add(struct s3c_gpio_chip *chip) -- cgit v1.2.3 From 2b431ff74a850db3d5b804be3ac466b6ed7f516d Mon Sep 17 00:00:00 2001 From: Yulgon Kim Date: Thu, 18 Aug 2011 20:40:24 +0900 Subject: ARM: EXYNOS4: Increase reset delay for USB HOST PHY This patch increases reset delay from 50 usec to 80 usec for USB HOST PHY. In order to reset USB HOST PHY controller properly, a little extra time is required during its reset cycle. Signed-off-by: Yulgon Kim Signed-off-by: Jingoo Han Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/setup-usb-phy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-exynos4/setup-usb-phy.c b/arch/arm/mach-exynos4/setup-usb-phy.c index 0883c1b824b9..39aca045f660 100644 --- a/arch/arm/mach-exynos4/setup-usb-phy.c +++ b/arch/arm/mach-exynos4/setup-usb-phy.c @@ -82,7 +82,7 @@ static int exynos4_usb_phy1_init(struct platform_device *pdev) rstcon &= ~(HOST_LINK_PORT_SWRST_MASK | PHY1_SWRST_MASK); writel(rstcon, EXYNOS4_RSTCON); - udelay(50); + udelay(80); clk_disable(otg_clk); clk_put(otg_clk); -- cgit v1.2.3 From d2edddf2b25863ec0893635662b0832f9965b543 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Fri, 19 Aug 2011 20:25:05 +0900 Subject: ARM: EXYNOS4: Add restart hook for proper reboot This is required to use SWRESET. Signed-off-by: Kyungmin Park Signed-off-by: Kukjin Kim --- arch/arm/mach-exynos4/cpu.c | 10 ++++++++++ arch/arm/mach-exynos4/include/mach/regs-pmu.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/arch/arm/mach-exynos4/cpu.c b/arch/arm/mach-exynos4/cpu.c index 84032d3aecd9..746d6fc6d397 100644 --- a/arch/arm/mach-exynos4/cpu.c +++ b/arch/arm/mach-exynos4/cpu.c @@ -27,8 +27,10 @@ #include #include #include +#include #include +#include extern int combiner_init(unsigned int combiner_nr, void __iomem *base, unsigned int irq_start); @@ -127,6 +129,11 @@ static void exynos4_idle(void) local_irq_enable(); } +static void exynos4_sw_reset(void) +{ + __raw_writel(0x1, S5P_SWRESET); +} + /* * exynos4_map_io * @@ -240,5 +247,8 @@ int __init exynos4_init(void) /* set idle function */ pm_idle = exynos4_idle; + /* set sw_reset function */ + s5p_reset_hook = exynos4_sw_reset; + return sysdev_register(&exynos4_sysdev); } diff --git a/arch/arm/mach-exynos4/include/mach/regs-pmu.h b/arch/arm/mach-exynos4/include/mach/regs-pmu.h index fa49bbb8e7b0..cdf9b47c303c 100644 --- a/arch/arm/mach-exynos4/include/mach/regs-pmu.h +++ b/arch/arm/mach-exynos4/include/mach/regs-pmu.h @@ -29,6 +29,8 @@ #define S5P_USE_STANDBY_WFE1 (1 << 25) #define S5P_USE_MASK ((0x3 << 16) | (0x3 << 24)) +#define S5P_SWRESET S5P_PMUREG(0x0400) + #define S5P_WAKEUP_STAT S5P_PMUREG(0x0600) #define S5P_EINT_WAKEUP_MASK S5P_PMUREG(0x0604) #define S5P_WAKEUP_MASK S5P_PMUREG(0x0608) -- cgit v1.2.3 From 5d747c6f2c9e1615685866251416268a0f648ffc Mon Sep 17 00:00:00 2001 From: Naveen Krishna Chatradhi Date: Fri, 19 Aug 2011 20:52:29 +0900 Subject: ARM: S5P: fix bug in spdif_clk_get_rate Should be passing the parent clk object when calling for parent rate. Signed-off-by: Naveen Krishna Chatradhi Signed-off-by: Kukjin Kim --- arch/arm/plat-s5p/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-s5p/clock.c b/arch/arm/plat-s5p/clock.c index 02af235298e2..5f84a3f13ef9 100644 --- a/arch/arm/plat-s5p/clock.c +++ b/arch/arm/plat-s5p/clock.c @@ -192,7 +192,7 @@ unsigned long s5p_spdif_get_rate(struct clk *clk) if (IS_ERR(pclk)) return -EINVAL; - rate = pclk->ops->get_rate(clk); + rate = pclk->ops->get_rate(pclk); clk_put(pclk); return rate; -- cgit v1.2.3 From bb0822954aab7d23a3f902c2a103ee0242f6046e Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 16 Aug 2011 13:37:14 -0600 Subject: squeeze max-pause area and drop pass-good area Revert the pass-good area introduced in ffd1f609ab10 ("writeback: introduce max-pause and pass-good dirty limits") and make the max-pause area smaller and safe. This fixes ~30% performance regression in the ext3 data=writeback fio_mmap_randwrite_64k/fio_mmap_randrw_64k test cases, where there are 12 JBOD disks, on each disk runs 8 concurrent tasks doing reads+writes. Using deadline scheduler also has a regression, but not that big as CFQ, so this suggests we have some write starvation. The test logs show that - the disks are sometimes under utilized - global dirty pages sometimes rush high to the pass-good area for several hundred seconds, while in the mean time some bdi dirty pages drop to very low value (bdi_dirty << bdi_thresh). Then suddenly the global dirty pages dropped under global dirty threshold and bdi_dirty rush very high (for example, 2 times higher than bdi_thresh). During which time balance_dirty_pages() is not called at all. So the problems are 1) The random writes progress so slow that they break the assumption of the max-pause logic that "8 pages per 200ms is typically more than enough to curb heavy dirtiers". 2) The max-pause logic ignored task_bdi_thresh and thus opens the possibility for some bdi's to over dirty pages, leading to (bdi_dirty >> bdi_thresh) and then (bdi_thresh >> bdi_dirty) for others. 3) The higher max-pause/pass-good thresholds somehow leads to the bad swing of dirty pages. The fix is to allow the task to slightly dirty over task_bdi_thresh, but no way to exceed bdi_dirty and/or global dirty_thresh. Tests show that it fixed the JBOD regression completely (both behavior and performance), while still being able to cut down large pause times in balance_dirty_pages() for single-disk cases. Reported-by: Li Shaohua Tested-by: Li Shaohua Acked-by: Jan Kara Signed-off-by: Wu Fengguang --- include/linux/writeback.h | 11 ----------- mm/page-writeback.c | 15 ++------------- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f1bfa12ea246..2b8963ff0f35 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -12,15 +12,6 @@ * * (thresh - thresh/DIRTY_FULL_SCOPE, thresh) * - * The 1/16 region above the global dirty limit will be put to maximum pauses: - * - * (limit, limit + limit/DIRTY_MAXPAUSE_AREA) - * - * The 1/16 region above the max-pause region, dirty exceeded bdi's will be put - * to loops: - * - * (limit + limit/DIRTY_MAXPAUSE_AREA, limit + limit/DIRTY_PASSGOOD_AREA) - * * Further beyond, all dirtier tasks will enter a loop waiting (possibly long * time) for the dirty pages to drop, unless written enough pages. * @@ -31,8 +22,6 @@ */ #define DIRTY_SCOPE 8 #define DIRTY_FULL_SCOPE (DIRTY_SCOPE / 2) -#define DIRTY_MAXPAUSE_AREA 16 -#define DIRTY_PASSGOOD_AREA 8 /* * 4MB minimal write chunk size diff --git a/mm/page-writeback.c b/mm/page-writeback.c index d1960744f881..0e309cd1b5b9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -754,21 +754,10 @@ static void balance_dirty_pages(struct address_space *mapping, * 200ms is typically more than enough to curb heavy dirtiers; * (b) the pause time limit makes the dirtiers more responsive. */ - if (nr_dirty < dirty_thresh + - dirty_thresh / DIRTY_MAXPAUSE_AREA && + if (nr_dirty < dirty_thresh && + bdi_dirty < (task_bdi_thresh + bdi_thresh) / 2 && time_after(jiffies, start_time + MAX_PAUSE)) break; - /* - * pass-good area. When some bdi gets blocked (eg. NFS server - * not responding), or write bandwidth dropped dramatically due - * to concurrent reads, or dirty threshold suddenly dropped and - * the dirty pages cannot be brought down anytime soon (eg. on - * slow USB stick), at least let go of the good bdi's. - */ - if (nr_dirty < dirty_thresh + - dirty_thresh / DIRTY_PASSGOOD_AREA && - bdi_dirty < bdi_thresh) - break; /* * Increase the delay for each loop, up to our previous -- cgit v1.2.3 From 63b37de12889b7b96463b7d6de6d3f3704486b91 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 16 Aug 2011 15:36:21 -0400 Subject: cpupower: fix Makefile typo Signed-off-by: Dave Jones Signed-off-by: Dominik Brodowski --- tools/power/cpupower/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 11521d2f0a4c..edb021c5f8cd 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -35,7 +35,7 @@ NLS ?= true # Set the following to 'true' to build/install the # cpufreq-bench benchmarking tool -CPUFRQ_BENCH ?= true +CPUFREQ_BENCH ?= true # Prefix to the directories we're installing to DESTDIR ?= @@ -139,7 +139,7 @@ ifeq ($(strip $(NLS)),true) COMPILE_NLS += create-gmo endif -ifeq ($(strip $(CPUFRQ_BENCH)),true) +ifeq ($(strip $(CPUFREQ_BENCH)),true) INSTALL_BENCH += install-bench COMPILE_BENCH += compile-bench endif -- cgit v1.2.3 From 47c336307a3680cfdf4adbe718d79f3fe66702ea Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 19 Aug 2011 17:00:02 +0200 Subject: cpupower: make NLS truly optional Loosely based on a patch for cpufrequtils, submittted by Sergey Dryabzhinsky and signed-off-by: Matt Turner Signed-off-by: Dominik Brodowski --- tools/power/cpupower/Makefile | 1 + tools/power/cpupower/utils/helpers/helpers.h | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index edb021c5f8cd..e8a03aceceb1 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -137,6 +137,7 @@ CFLAGS += -pipe ifeq ($(strip $(NLS)),true) INSTALL_NLS += install-gmo COMPILE_NLS += create-gmo + CFLAGS += -DNLS endif ifeq ($(strip $(CPUFREQ_BENCH)),true) diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h index 7a83022733b2..2747e738efb0 100644 --- a/tools/power/cpupower/utils/helpers/helpers.h +++ b/tools/power/cpupower/utils/helpers/helpers.h @@ -16,11 +16,20 @@ #include "helpers/bitmask.h" /* Internationalization ****************************/ +#ifdef NLS + #define _(String) gettext(String) #ifndef gettext_noop #define gettext_noop(String) String #endif #define N_(String) gettext_noop(String) + +#else /* !NLS */ + +#define _(String) String +#define N_(String) String + +#endif /* Internationalization ****************************/ extern int run_as_root; -- cgit v1.2.3 From 498ca793d90aef8ad38a852a969c257f62832738 Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Sat, 6 Aug 2011 18:11:43 +0200 Subject: cpupower: use man(1) when calling "cpupower help subcommand" Instead of printing something non-formatted to stdout, call man(1) to show the man page for the proper subcommand. Signed-off-by: Dominik Brodowski --- tools/power/cpupower/man/cpupower-frequency-info.1 | 6 +- tools/power/cpupower/man/cpupower-frequency-set.1 | 8 +- tools/power/cpupower/man/cpupower.1 | 14 ++-- tools/power/cpupower/utils/builtin.h | 7 -- tools/power/cpupower/utils/cpufreq-info.c | 42 +--------- tools/power/cpupower/utils/cpufreq-set.c | 29 +------ tools/power/cpupower/utils/cpuidle-info.c | 24 +----- tools/power/cpupower/utils/cpupower-info.c | 20 +---- tools/power/cpupower/utils/cpupower-set.c | 25 +----- tools/power/cpupower/utils/cpupower.c | 91 ++++++++++++---------- .../cpupower/utils/idle_monitor/cpupower-monitor.c | 48 ++++-------- 11 files changed, 86 insertions(+), 228 deletions(-) diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1 index 3194811d58f5..bb60a8d1e45a 100644 --- a/tools/power/cpupower/man/cpupower-frequency-info.1 +++ b/tools/power/cpupower/man/cpupower-frequency-info.1 @@ -1,10 +1,10 @@ -.TH "cpufreq-info" "1" "0.1" "Mattia Dongili" "" +.TH "cpupower-frequency-info" "1" "0.1" "Mattia Dongili" "" .SH "NAME" .LP -cpufreq\-info \- Utility to retrieve cpufreq kernel information +cpupower frequency\-info \- Utility to retrieve cpufreq kernel information .SH "SYNTAX" .LP -cpufreq\-info [\fIoptions\fP] +cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP] .SH "DESCRIPTION" .LP A small tool which prints out cpufreq information helpful to developers and interested users. diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1 index 26e3e13eee3b..685f469093ad 100644 --- a/tools/power/cpupower/man/cpupower-frequency-set.1 +++ b/tools/power/cpupower/man/cpupower-frequency-set.1 @@ -1,13 +1,13 @@ -.TH "cpufreq-set" "1" "0.1" "Mattia Dongili" "" +.TH "cpupower-freqency-set" "1" "0.1" "Mattia Dongili" "" .SH "NAME" .LP -cpufreq\-set \- A small tool which allows to modify cpufreq settings. +cpupower frequency\-set \- A small tool which allows to modify cpufreq settings. .SH "SYNTAX" .LP -cpufreq\-set [\fIoptions\fP] +cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP] .SH "DESCRIPTION" .LP -cpufreq\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time. +cpupower frequency\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time. .SH "OPTIONS" .LP .TP diff --git a/tools/power/cpupower/man/cpupower.1 b/tools/power/cpupower/man/cpupower.1 index 78c20feab85c..baf741d06e82 100644 --- a/tools/power/cpupower/man/cpupower.1 +++ b/tools/power/cpupower/man/cpupower.1 @@ -3,7 +3,7 @@ cpupower \- Shows and sets processor power related values .SH SYNOPSIS .ft B -.B cpupower [ \-c cpulist ] subcommand [ARGS] +.B cpupower [ \-c cpulist ] [ARGS] .B cpupower \-v|\-\-version @@ -13,24 +13,24 @@ cpupower \- Shows and sets processor power related values \fBcpupower \fP is a collection of tools to examine and tune power saving related features of your processor. -The manpages of the subcommands (cpupower\-(1)) provide detailed +The manpages of the commands (cpupower\-(1)) provide detailed descriptions of supported features. Run \fBcpupower help\fP to get an overview -of supported subcommands. +of supported commands. .SH Options .PP \-\-help, \-h .RS 4 -Shows supported subcommands and general usage. +Shows supported commands and general usage. .RE .PP \-\-cpu cpulist, \-c cpulist .RS 4 Only show or set values for specific cores. -This option is not supported by all subcommands, details can be found in the -manpages of the subcommands. +This option is not supported by all commands, details can be found in the +manpages of the commands. -Some subcommands access all cores (typically the *\-set commands), some only +Some commands access all cores (typically the *\-set commands), some only the first core (typically the *\-info commands) by default. The syntax for is based on how the kernel exports CPU bitmasks via diff --git a/tools/power/cpupower/utils/builtin.h b/tools/power/cpupower/utils/builtin.h index c870ffba5219..c10496fbe3c6 100644 --- a/tools/power/cpupower/utils/builtin.h +++ b/tools/power/cpupower/utils/builtin.h @@ -8,11 +8,4 @@ extern int cmd_freq_info(int argc, const char **argv); extern int cmd_idle_info(int argc, const char **argv); extern int cmd_monitor(int argc, const char **argv); -extern void set_help(void); -extern void info_help(void); -extern void freq_set_help(void); -extern void freq_info_help(void); -extern void idle_info_help(void); -extern void monitor_help(void); - #endif diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c index 5a1d25f056b3..28953c9a7bd5 100644 --- a/tools/power/cpupower/utils/cpufreq-info.c +++ b/tools/power/cpupower/utils/cpufreq-info.c @@ -510,37 +510,6 @@ static int get_latency(unsigned int cpu, unsigned int human) return 0; } -void freq_info_help(void) -{ - printf(_("Usage: cpupower freqinfo [options]\n")); - printf(_("Options:\n")); - printf(_(" -e, --debug Prints out debug information [default]\n")); - printf(_(" -f, --freq Get frequency the CPU currently runs at, according\n" - " to the cpufreq core *\n")); - printf(_(" -w, --hwfreq Get frequency the CPU currently runs at, by reading\n" - " it from hardware (only available to root) *\n")); - printf(_(" -l, --hwlimits Determine the minimum and maximum CPU frequency allowed *\n")); - printf(_(" -d, --driver Determines the used cpufreq kernel driver *\n")); - printf(_(" -p, --policy Gets the currently used cpufreq policy *\n")); - printf(_(" -g, --governors Determines available cpufreq governors *\n")); - printf(_(" -r, --related-cpus Determines which CPUs run at the same hardware frequency *\n")); - printf(_(" -a, --affected-cpus Determines which CPUs need to have their frequency\n" - " coordinated by software *\n")); - printf(_(" -s, --stats Shows cpufreq statistics if available\n")); - printf(_(" -y, --latency Determines the maximum latency on CPU frequency changes *\n")); - printf(_(" -b, --boost Checks for turbo or boost modes *\n")); - printf(_(" -o, --proc Prints out information like provided by the /proc/cpufreq\n" - " interface in 2.4. and early 2.6. kernels\n")); - printf(_(" -m, --human human-readable output for the -f, -w, -s and -y parameters\n")); - printf(_(" -h, --help Prints out this screen\n")); - - printf("\n"); - printf(_("If no argument is given, full output about\n" - "cpufreq is printed which is useful e.g. for reporting bugs.\n\n")); - printf(_("By default info of CPU 0 is shown which can be overridden\n" - "with the cpupower --cpu main command option.\n")); -} - static struct option info_opts[] = { { .name = "debug", .has_arg = no_argument, .flag = NULL, .val = 'e'}, { .name = "boost", .has_arg = no_argument, .flag = NULL, .val = 'b'}, @@ -556,7 +525,6 @@ static struct option info_opts[] = { { .name = "latency", .has_arg = no_argument, .flag = NULL, .val = 'y'}, { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, { .name = "human", .has_arg = no_argument, .flag = NULL, .val = 'm'}, - { .name = "help", .has_arg = no_argument, .flag = NULL, .val = 'h'}, { }, }; @@ -570,16 +538,12 @@ int cmd_freq_info(int argc, char **argv) int output_param = 0; do { - ret = getopt_long(argc, argv, "hoefwldpgrasmyb", info_opts, NULL); + ret = getopt_long(argc, argv, "oefwldpgrasmyb", info_opts, NULL); switch (ret) { case '?': output_param = '?'; cont = 0; break; - case 'h': - output_param = 'h'; - cont = 0; - break; case -1: cont = 0; break; @@ -642,11 +606,7 @@ int cmd_freq_info(int argc, char **argv) return -EINVAL; case '?': printf(_("invalid or unknown argument\n")); - freq_info_help(); return -EINVAL; - case 'h': - freq_info_help(); - return EXIT_SUCCESS; case 'o': proc_cpufreq_output(); return EXIT_SUCCESS; diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c index 5f783622bf31..dd1539eb8c63 100644 --- a/tools/power/cpupower/utils/cpufreq-set.c +++ b/tools/power/cpupower/utils/cpufreq-set.c @@ -20,34 +20,11 @@ #define NORM_FREQ_LEN 32 -void freq_set_help(void) -{ - printf(_("Usage: cpupower frequency-set [options]\n")); - printf(_("Options:\n")); - printf(_(" -d FREQ, --min FREQ new minimum CPU frequency the governor may select\n")); - printf(_(" -u FREQ, --max FREQ new maximum CPU frequency the governor may select\n")); - printf(_(" -g GOV, --governor GOV new cpufreq governor\n")); - printf(_(" -f FREQ, --freq FREQ specific frequency to be set. Requires userspace\n" - " governor to be available and loaded\n")); - printf(_(" -r, --related Switches all hardware-related CPUs\n")); - printf(_(" -h, --help Prints out this screen\n")); - printf("\n"); - printf(_("Notes:\n" - "1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n")); - printf(_("2. The -f FREQ, --freq FREQ parameter cannot be combined with any other parameter\n" - " except the -c CPU, --cpu CPU parameter\n" - "3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n" - " by postfixing the value with the wanted unit name, without any space\n" - " (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n")); - -} - static struct option set_opts[] = { { .name = "min", .has_arg = required_argument, .flag = NULL, .val = 'd'}, { .name = "max", .has_arg = required_argument, .flag = NULL, .val = 'u'}, { .name = "governor", .has_arg = required_argument, .flag = NULL, .val = 'g'}, { .name = "freq", .has_arg = required_argument, .flag = NULL, .val = 'f'}, - { .name = "help", .has_arg = no_argument, .flag = NULL, .val = 'h'}, { .name = "related", .has_arg = no_argument, .flag = NULL, .val='r'}, { }, }; @@ -80,7 +57,6 @@ const struct freq_units def_units[] = { static void print_unknown_arg(void) { printf(_("invalid or unknown argument\n")); - freq_set_help(); } static unsigned long string_to_frequency(const char *str) @@ -231,14 +207,11 @@ int cmd_freq_set(int argc, char **argv) /* parameter parsing */ do { - ret = getopt_long(argc, argv, "d:u:g:f:hr", set_opts, NULL); + ret = getopt_long(argc, argv, "d:u:g:f:r", set_opts, NULL); switch (ret) { case '?': print_unknown_arg(); return -EINVAL; - case 'h': - freq_set_help(); - return 0; case -1: cont = 0; break; diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 70da3574f1e9..b028267c1376 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -139,30 +139,14 @@ static void proc_cpuidle_cpu_output(unsigned int cpu) } } -/* --freq / -f */ - -void idle_info_help(void) -{ - printf(_ ("Usage: cpupower idleinfo [options]\n")); - printf(_ ("Options:\n")); - printf(_ (" -s, --silent Only show general C-state information\n")); - printf(_ (" -o, --proc Prints out information like provided by the /proc/acpi/processor/*/power\n" - " interface in older kernels\n")); - printf(_ (" -h, --help Prints out this screen\n")); - - printf("\n"); -} - static struct option info_opts[] = { { .name = "silent", .has_arg = no_argument, .flag = NULL, .val = 's'}, { .name = "proc", .has_arg = no_argument, .flag = NULL, .val = 'o'}, - { .name = "help", .has_arg = no_argument, .flag = NULL, .val = 'h'}, { }, }; static inline void cpuidle_exit(int fail) { - idle_info_help(); exit(EXIT_FAILURE); } @@ -174,7 +158,7 @@ int cmd_idle_info(int argc, char **argv) unsigned int cpu = 0; do { - ret = getopt_long(argc, argv, "hos", info_opts, NULL); + ret = getopt_long(argc, argv, "os", info_opts, NULL); if (ret == -1) break; switch (ret) { @@ -182,10 +166,6 @@ int cmd_idle_info(int argc, char **argv) output_param = '?'; cont = 0; break; - case 'h': - output_param = 'h'; - cont = 0; - break; case 's': verbose = 0; break; @@ -211,8 +191,6 @@ int cmd_idle_info(int argc, char **argv) case '?': printf(_("invalid or unknown argument\n")); cpuidle_exit(EXIT_FAILURE); - case 'h': - cpuidle_exit(EXIT_SUCCESS); } /* Default is: show output of CPU 0 only */ diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c index 85253cb7600e..3f68632c28c7 100644 --- a/tools/power/cpupower/utils/cpupower-info.c +++ b/tools/power/cpupower/utils/cpupower-info.c @@ -16,31 +16,16 @@ #include "helpers/helpers.h" #include "helpers/sysfs.h" -void info_help(void) -{ - printf(_("Usage: cpupower info [ -b ] [ -m ] [ -s ]\n")); - printf(_("Options:\n")); - printf(_(" -b, --perf-bias Gets CPU's power vs performance policy on some\n" - " Intel models [0-15], see manpage for details\n")); - printf(_(" -m, --sched-mc Gets the kernel's multi core scheduler policy.\n")); - printf(_(" -s, --sched-smt Gets the kernel's thread sibling scheduler policy.\n")); - printf(_(" -h, --help Prints out this screen\n")); - printf(_("\nPassing no option will show all info, by default only on core 0\n")); - printf("\n"); -} - static struct option set_opts[] = { { .name = "perf-bias", .has_arg = optional_argument, .flag = NULL, .val = 'b'}, { .name = "sched-mc", .has_arg = optional_argument, .flag = NULL, .val = 'm'}, { .name = "sched-smt", .has_arg = optional_argument, .flag = NULL, .val = 's'}, - { .name = "help", .has_arg = no_argument, .flag = NULL, .val = 'h'}, { }, }; static void print_wrong_arg_exit(void) { printf(_("invalid or unknown argument\n")); - info_help(); exit(EXIT_FAILURE); } @@ -64,11 +49,8 @@ int cmd_info(int argc, char **argv) textdomain(PACKAGE); /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "msbh", set_opts, NULL)) != -1) { + while ((ret = getopt_long(argc, argv, "msb", set_opts, NULL)) != -1) { switch (ret) { - case 'h': - info_help(); - return 0; case 'b': if (params.perf_bias) print_wrong_arg_exit(); diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c index bc1b391e46f0..dc4de3762111 100644 --- a/tools/power/cpupower/utils/cpupower-set.c +++ b/tools/power/cpupower/utils/cpupower-set.c @@ -17,30 +17,16 @@ #include "helpers/sysfs.h" #include "helpers/bitmask.h" -void set_help(void) -{ - printf(_("Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n")); - printf(_("Options:\n")); - printf(_(" -b, --perf-bias [VAL] Sets CPU's power vs performance policy on some\n" - " Intel models [0-15], see manpage for details\n")); - printf(_(" -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n")); - printf(_(" -s, --sched-smt [VAL] Sets the kernel's thread sibling scheduler policy.\n")); - printf(_(" -h, --help Prints out this screen\n")); - printf("\n"); -} - static struct option set_opts[] = { { .name = "perf-bias", .has_arg = optional_argument, .flag = NULL, .val = 'b'}, { .name = "sched-mc", .has_arg = optional_argument, .flag = NULL, .val = 'm'}, { .name = "sched-smt", .has_arg = optional_argument, .flag = NULL, .val = 's'}, - { .name = "help", .has_arg = no_argument, .flag = NULL, .val = 'h'}, { }, }; static void print_wrong_arg_exit(void) { printf(_("invalid or unknown argument\n")); - set_help(); exit(EXIT_FAILURE); } @@ -66,12 +52,9 @@ int cmd_set(int argc, char **argv) params.params = 0; /* parameter parsing */ - while ((ret = getopt_long(argc, argv, "m:s:b:h", + while ((ret = getopt_long(argc, argv, "m:s:b:", set_opts, NULL)) != -1) { switch (ret) { - case 'h': - set_help(); - return 0; case 'b': if (params.perf_bias) print_wrong_arg_exit(); @@ -110,10 +93,8 @@ int cmd_set(int argc, char **argv) } }; - if (!params.params) { - set_help(); - return -EINVAL; - } + if (!params.params) + print_wrong_arg_exit(); if (params.sched_mc) { ret = sysfs_set_sched("mc", sched_mc); diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c index 5844ae0f786f..52bee591c1c5 100644 --- a/tools/power/cpupower/utils/cpupower.c +++ b/tools/power/cpupower/utils/cpupower.c @@ -11,6 +11,7 @@ #include #include #include +#include #include "builtin.h" #include "helpers/helpers.h" @@ -19,13 +20,12 @@ struct cmd_struct { const char *cmd; int (*main)(int, const char **); - void (*usage)(void); int needs_root; }; #define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) -int cmd_help(int argc, const char **argv); +static int cmd_help(int argc, const char **argv); /* Global cpu_info object available for all binaries * Info only retrieved from CPU 0 @@ -44,55 +44,66 @@ int be_verbose; static void print_help(void); static struct cmd_struct commands[] = { - { "frequency-info", cmd_freq_info, freq_info_help, 0 }, - { "frequency-set", cmd_freq_set, freq_set_help, 1 }, - { "idle-info", cmd_idle_info, idle_info_help, 0 }, - { "set", cmd_set, set_help, 1 }, - { "info", cmd_info, info_help, 0 }, - { "monitor", cmd_monitor, monitor_help, 0 }, - { "help", cmd_help, print_help, 0 }, - /* { "bench", cmd_bench, NULL, 1 }, */ + { "frequency-info", cmd_freq_info, 0 }, + { "frequency-set", cmd_freq_set, 1 }, + { "idle-info", cmd_idle_info, 0 }, + { "set", cmd_set, 1 }, + { "info", cmd_info, 0 }, + { "monitor", cmd_monitor, 0 }, + { "help", cmd_help, 0 }, + /* { "bench", cmd_bench, 1 }, */ }; -int cmd_help(int argc, const char **argv) -{ - unsigned int i; - - if (argc > 1) { - for (i = 0; i < ARRAY_SIZE(commands); i++) { - struct cmd_struct *p = commands + i; - if (strcmp(p->cmd, argv[1])) - continue; - if (p->usage) { - p->usage(); - return EXIT_SUCCESS; - } - } - } - print_help(); - if (argc == 1) - return EXIT_SUCCESS; /* cpupower help */ - return EXIT_FAILURE; -} - static void print_help(void) { unsigned int i; #ifdef DEBUG - printf(_("cpupower [ -d ][ -c cpulist ] subcommand [ARGS]\n")); - printf(_(" -d, --debug May increase output (stderr) on some subcommands\n")); + printf(_("Usage:\tcpupower [-d|--debug] [-c|--cpu cpulist ] []\n")); #else - printf(_("cpupower [ -c cpulist ] subcommand [ARGS]\n")); + printf(_("Usage:\tcpupower [-c|--cpu cpulist ] []\n")); #endif - printf(_("cpupower --version\n")); - printf(_("Supported subcommands are:\n")); + printf(_("Supported commands are:\n")); for (i = 0; i < ARRAY_SIZE(commands); i++) printf("\t%s\n", commands[i].cmd); - printf(_("\nSome subcommands can make use of the -c cpulist option.\n")); - printf(_("Look at the general cpupower manpage how to use it\n")); - printf(_("and read up the subcommand's manpage whether it is supported.\n")); - printf(_("\nUse cpupower help subcommand for getting help for above subcommands.\n")); + printf(_("\nNot all commands can make use of the -c cpulist option.\n")); + printf(_("\nUse 'cpupower help ' for getting help for above commands.\n")); +} + +static int print_man_page(const char *subpage) +{ + int len; + char *page; + + len = 10; /* enough for "cpupower-" */ + if (subpage != NULL) + len += strlen(subpage); + + page = malloc(len); + if (!page) + return -ENOMEM; + + sprintf(page, "cpupower"); + if ((subpage != NULL) && strcmp(subpage, "help")) { + strcat(page, "-"); + strcat(page, subpage); + } + + execlp("man", "man", page, NULL); + + /* should not be reached */ + return -EINVAL; +} + +static int cmd_help(int argc, const char **argv) +{ + if (argc > 1) { + print_man_page(argv[1]); /* exits within execlp() */ + return EXIT_FAILURE; + } + + print_help(); + return EXIT_SUCCESS; } static void print_version(void) diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index 6cb8d9e6bb6b..0d6571e418db 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -43,6 +43,12 @@ static struct cpupower_topology cpu_top; /* ToDo: Document this in the manpage */ static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', }; +static void print_wrong_arg_exit(void) +{ + printf(_("invalid or unknown argument\n")); + exit(EXIT_FAILURE); +} + long long timespec_diff_us(struct timespec start, struct timespec end) { struct timespec temp; @@ -56,21 +62,6 @@ long long timespec_diff_us(struct timespec start, struct timespec end) return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000); } -void monitor_help(void) -{ - printf(_("cpupower monitor: [-m ,[],.. ] command\n")); - printf(_("cpupower monitor: [-m ,[],.. ] [ -i interval_sec ]\n")); - printf(_("cpupower monitor: -l\n")); - printf(_("\t command: pass an arbitrary command to measure specific workload\n")); - printf(_("\t -i: time intervall to measure for in seconds (default 1)\n")); - printf(_("\t -l: list available CPU sleep monitors (for use with -m)\n")); - printf(_("\t -m: show specific CPU sleep monitors only (in same order)\n")); - printf(_("\t -h: print this help\n")); - printf("\n"); - printf(_("only one of: -l, -m are allowed\nIf none of them is passed,")); - printf(_(" all supported monitors are shown\n")); -} - void print_n_spaces(int n) { int x; @@ -246,7 +237,6 @@ static void parse_monitor_param(char *param) if (hits == 0) { printf(_("No matching monitor found in %s, " "try -l option\n"), param); - monitor_help(); exit(EXIT_FAILURE); } /* Override detected/registerd monitors array with requested one */ @@ -343,37 +333,27 @@ static void cmdline(int argc, char *argv[]) int opt; progname = basename(argv[0]); - while ((opt = getopt(argc, argv, "+hli:m:")) != -1) { + while ((opt = getopt(argc, argv, "+li:m:")) != -1) { switch (opt) { - case 'h': - monitor_help(); - exit(EXIT_SUCCESS); case 'l': - if (mode) { - monitor_help(); - exit(EXIT_FAILURE); - } + if (mode) + print_wrong_arg_exit(); mode = list; break; case 'i': /* only allow -i with -m or no option */ - if (mode && mode != show) { - monitor_help(); - exit(EXIT_FAILURE); - } + if (mode && mode != show) + print_wrong_arg_exit(); interval = atoi(optarg); break; case 'm': - if (mode) { - monitor_help(); - exit(EXIT_FAILURE); - } + if (mode) + print_wrong_arg_exit(); mode = show; show_monitors_param = optarg; break; default: - monitor_help(); - exit(EXIT_FAILURE); + print_wrong_arg_exit(); } } if (!mode) -- cgit v1.2.3 From 69566dd8be42dea7a22f625abc96e65bb4b45d1f Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 16 Aug 2011 11:24:37 -0700 Subject: PCI: OF: Don't crash when bridge parent is NULL. In pcibios_get_phb_of_node(), we will crash while booting if bus->bridge->parent is NULL. Check for this case and avoid dereferencing the NULL pointer. Signed-off-by: David Daney Acked-by: Benjamin Herrenschmidt Acked-by: Grant Likely Signed-off-by: Jesse Barnes --- drivers/pci/of.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/of.c b/drivers/pci/of.c index c94d37ec55c8..f0929934bb7a 100644 --- a/drivers/pci/of.c +++ b/drivers/pci/of.c @@ -55,7 +55,7 @@ struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus) */ if (bus->bridge->of_node) return of_node_get(bus->bridge->of_node); - if (bus->bridge->parent->of_node) + if (bus->bridge->parent && bus->bridge->parent->of_node) return of_node_get(bus->bridge->parent->of_node); return NULL; } -- cgit v1.2.3 From 016f1c54408b1e92e2e8087bfc05ca0a9c258513 Mon Sep 17 00:00:00 2001 From: Michal Marek Date: Thu, 11 Aug 2011 12:29:46 +0200 Subject: UBIFS: not build debug messages with CONFIG_UBIFS_FS_DEBUG disabled With $ grep -e UBIFS_FS_DEBUG -e DYNAMIC_DEBUG .config # CONFIG_UBIFS_FS_DEBUG is not set CONFIG_DYNAMIC_DEBUG=y Debug messages are kept in the object files due to the dynamic_pr_debug() macro, even if they are never going to be printed: $ make fs/ubifs/super.o $ strings fs/ubifs/super.o | grep 'compiled on' compiled on: Aug 11 2011 at 12:21:38 Use plain printk to fix this. Signed-off-by: Michal Marek Signed-off-by: Artem Bityutskiy --- fs/ubifs/debug.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 45174b534377..feb361e252ac 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -335,9 +335,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define DBGKEY(key) ((char *)(key)) #define DBGKEY1(key) ((char *)(key)) -#define ubifs_dbg_msg(fmt, ...) do { \ - if (0) \ - pr_debug(fmt "\n", ##__VA_ARGS__); \ +#define ubifs_dbg_msg(fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \ } while (0) #define dbg_dump_stack() -- cgit v1.2.3 From 9efabc84768ee8e79b50ad6ad6cff94d66da01f7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 19 Aug 2011 19:02:27 +0300 Subject: UBI: do not link debug messages when debugging is disabled Michal Marek spotted the same issue in UBIFS and this patch fixes UBI, see "UBIFS: not build debug messages with CONFIG_UBIFS_FS_DEBUG disabled" When UBI debugging is disabled, we have debugging messages defined as: if (0) pr_debug() But pr_debug macro defines data structures with debugging data and makes the linux binary larger, even though we have "if (0)". Signed-off-by: Artem Bityutskiy --- drivers/mtd/ubi/debug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mtd/ubi/debug.h b/drivers/mtd/ubi/debug.h index 65b5b76cc379..64fbb0021825 100644 --- a/drivers/mtd/ubi/debug.h +++ b/drivers/mtd/ubi/debug.h @@ -181,7 +181,7 @@ static inline int ubi_dbg_is_erase_failure(const struct ubi_device *ubi) #define ubi_dbg_msg(fmt, ...) do { \ if (0) \ - pr_debug(fmt "\n", ##__VA_ARGS__); \ + printk(KERN_DEBUG fmt "\n", ##__VA_ARGS__); \ } while (0) #define dbg_msg(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) -- cgit v1.2.3 From d555ab6bb321814853ca8a8d4e8e22d52e18a871 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 29 Jul 2011 21:11:43 -0700 Subject: max8998_charger: Needs module.h power/max8998_charger.c uses interfaces from linux/module.h, so it should include that file. This fixes build errors. Signed-off-by: Randy Dunlap Signed-off-by: Anton Vorontsov --- drivers/power/max8998_charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/max8998_charger.c b/drivers/power/max8998_charger.c index cc21fa2120be..ef8efadb58cb 100644 --- a/drivers/power/max8998_charger.c +++ b/drivers/power/max8998_charger.c @@ -20,6 +20,7 @@ */ #include +#include #include #include #include -- cgit v1.2.3 From 71aa79a8c2537eb07cd26b5e4dc43274a9c10692 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 1 Aug 2011 07:29:31 +0800 Subject: max8997_charger: Needs module.h power/max8997_charger.c uses interfaces from linux/module.h, so it should include that file. This fixes build errors. Signed-off-by: Axel Lin Acked-by: MyungJoo Ham Signed-off-by: Anton Vorontsov --- drivers/power/max8997_charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/max8997_charger.c b/drivers/power/max8997_charger.c index 7106b49b26e4..ffc5033ea9c9 100644 --- a/drivers/power/max8997_charger.c +++ b/drivers/power/max8997_charger.c @@ -20,6 +20,7 @@ */ #include +#include #include #include #include -- cgit v1.2.3 From 815efa1eab5b0c3e071e5d6df0cc2d7e0c7e6fd7 Mon Sep 17 00:00:00 2001 From: Vasily Khoruzhick Date: Fri, 12 Aug 2011 17:55:18 +0300 Subject: s3c-adc-battery: Fix compilation error due to missing header (module.h) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add linux/module.h to fix this compilation error: drivers/power/s3c_adc_battery.c:435:15: error: expected declaration specifiers or ‘...’ before string constant drivers/power/s3c_adc_battery.c:435:1: warning: data definition has no type or storage class drivers/power/s3c_adc_battery.c:435:1: warning: type defaults to ‘int’ in declaration of ‘MODULE_AUTHOR’ drivers/power/s3c_adc_battery.c:435:15: warning: function declaration isn’t a prototype drivers/power/s3c_adc_battery.c:436:20: error: expected declaration specifiers or ‘...’ before string constant drivers/power/s3c_adc_battery.c:436:1: warning: data definition has no type or storage class drivers/power/s3c_adc_battery.c:436:1: warning: type defaults to ‘int’ in declaration of ‘MODULE_DESCRIPTION’ drivers/power/s3c_adc_battery.c:436:20: warning: function declaration isn’t a prototype drivers/power/s3c_adc_battery.c:437:16: error: expected declaration specifiers or ‘...’ before string constant drivers/power/s3c_adc_battery.c:437:1: warning: data definition has no type or storage class drivers/power/s3c_adc_battery.c:437:1: warning: type defaults to ‘int’ in declaration of ‘MODULE_LICENSE’ drivers/power/s3c_adc_battery.c:437:16: warning: function declaration isn’t a prototype make[2]: *** [drivers/power/s3c_adc_battery.o] Error 1 Signed-off-by: Vasily Khoruzhick Signed-off-by: Ian Lartey Signed-off-by: Anton Vorontsov --- drivers/power/s3c_adc_battery.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/s3c_adc_battery.c b/drivers/power/s3c_adc_battery.c index a675e31b4f13..d32d0d70f9ba 100644 --- a/drivers/power/s3c_adc_battery.c +++ b/drivers/power/s3c_adc_battery.c @@ -20,6 +20,7 @@ #include #include #include +#include #include -- cgit v1.2.3 From b095cd0a0ccdbc00c9fd99d90b22f8563687971f Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 12 Aug 2011 15:28:32 -0700 Subject: drm/i915: set GFX_MODE to pre-Ivybridge default value even on Ivybridge Prior to Ivybridge, the GFX_MODE would default to 0x800, meaning that MI_FLUSH would flush the TLBs in addition to the rest of the caches indicated in the MI_FLUSH command. However starting with Ivybridge, the register defaults to 0x2800 out of reset, meaning that to invalidate the TLB we need to use PIPE_CONTROL. Since we're not doing that yet, go back to the old default so things work. v2: don't forget to actually *clear* the new bit Reviewed-by: Eric Anholt Reviewed-by: Chris Wilson Tested-by: Kenneth Graunke Signed-off-by: Jesse Barnes --- drivers/gpu/drm/i915/i915_reg.h | 4 ++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5baaef4a0c5d..542453f7498c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -375,6 +375,7 @@ # define MI_FLUSH_ENABLE (1 << 11) #define GFX_MODE 0x02520 +#define GFX_MODE_GEN7 0x0229c #define GFX_RUN_LIST_ENABLE (1<<15) #define GFX_TLB_INVALIDATE_ALWAYS (1<<13) #define GFX_SURFACE_FAULT_ENABLE (1<<12) @@ -382,6 +383,9 @@ #define GFX_PSMI_GRANULARITY (1<<10) #define GFX_PPGTT_ENABLE (1<<9) +#define GFX_MODE_ENABLE(bit) (((bit) << 16) | (bit)) +#define GFX_MODE_DISABLE(bit) (((bit) << 16) | (0)) + #define SCPD0 0x0209c /* 915+ only */ #define IER 0x020a0 #define IIR 0x020a4 diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 47b9b2777038..c30626ea9f93 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -290,6 +290,10 @@ static int init_render_ring(struct intel_ring_buffer *ring) if (IS_GEN6(dev) || IS_GEN7(dev)) mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE; I915_WRITE(MI_MODE, mode); + if (IS_GEN7(dev)) + I915_WRITE(GFX_MODE_GEN7, + GFX_MODE_DISABLE(GFX_TLB_INVALIDATE_ALWAYS) | + GFX_MODE_ENABLE(GFX_REPLAY_MODE)); } if (INTEL_INFO(dev)->gen >= 6) { -- cgit v1.2.3 From c956b753e706f24d18a026f8efa4df3b1919fcc9 Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Fri, 19 Aug 2011 16:59:39 -0600 Subject: OMAP: powerdomains: Make all powerdomain target states as ON at init Program all powerdomain target state as ON; this is to prevent domains from hitting low power states (if bootloader has target states set to something other than ON) and potentially even losing context while PM is not fully initialized, which can cause the system to crash. The PM late init code can then program the desired target state for all the power domains. Signed-off-by: Rajendra Nayak Signed-off-by: Santosh Shilimkar [paul@pwsan.com: dropped comment typo hunk; fixed comment indent and moved to kerneldoc; moved code to pwrdm_init(); changed pwrdm_init() argument name to prevent clash; cleaned up patch description] Signed-off-by: Paul Walmsley --- arch/arm/mach-omap2/powerdomain.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 9af08473bf10..ef71fdd40fc4 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -195,28 +195,35 @@ static int _pwrdm_post_transition_cb(struct powerdomain *pwrdm, void *unused) /** * pwrdm_init - set up the powerdomain layer - * @pwrdm_list: array of struct powerdomain pointers to register + * @pwrdms: array of struct powerdomain pointers to register * @custom_funcs: func pointers for arch specific implementations * - * Loop through the array of powerdomains @pwrdm_list, registering all - * that are available on the current CPU. If pwrdm_list is supplied - * and not null, all of the referenced powerdomains will be - * registered. No return value. XXX pwrdm_list is not really a - * "list"; it is an array. Rename appropriately. + * Loop through the array of powerdomains @pwrdms, registering all + * that are available on the current CPU. Also, program all + * powerdomain target state as ON; this is to prevent domains from + * hitting low power states (if bootloader has target states set to + * something other than ON) and potentially even losing context while + * PM is not fully initialized. The PM late init code can then program + * the desired target state for all the power domains. No return + * value. */ -void pwrdm_init(struct powerdomain **pwrdm_list, struct pwrdm_ops *custom_funcs) +void pwrdm_init(struct powerdomain **pwrdms, struct pwrdm_ops *custom_funcs) { struct powerdomain **p = NULL; + struct powerdomain *temp_p; if (!custom_funcs) WARN(1, "powerdomain: No custom pwrdm functions registered\n"); else arch_pwrdm = custom_funcs; - if (pwrdm_list) { - for (p = pwrdm_list; *p; p++) + if (pwrdms) { + for (p = pwrdms; *p; p++) _pwrdm_register(*p); } + + list_for_each_entry(temp_p, &pwrdm_list, node) + pwrdm_set_next_pwrst(temp_p, PWRDM_POWER_ON); } /** -- cgit v1.2.3 From b1cbdb00da2ac00eb67fe277e563ff1f5093b4ba Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Fri, 19 Aug 2011 16:59:39 -0600 Subject: OMAP: clockdomain: Wait for powerdomain to be ON when using clockdomain force wakeup While using clockdomain force wakeup method, not waiting for powerdomain to be effectively ON may end up locking the clockdomain FSM until a next wakeup event occurs. One such issue was seen on OMAP4430, where L4_PER was periodically getting stuck in in-transition state when transitioning from from OSWR to ON. This issue was reported and investigated by Patrick Titiano Signed-off-by: Santosh Shilimkar Signed-off-by: Rajendra Nayak Reported-by: Patrick Titiano Cc: Kevin Hilman Cc: Benoit Cousson Cc: Paul Walmsley [paul@pwsan.com: updated to apply; added transition wait on clkdm_deny_idle(); remove two superfluous pwrdm_wait_transition() calls] Signed-off-by: Paul Walmsley --- arch/arm/mach-omap2/clockdomain.c | 2 ++ arch/arm/mach-omap2/pm.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-omap2/clockdomain.c b/arch/arm/mach-omap2/clockdomain.c index ab7db083f97f..8f0890685d7b 100644 --- a/arch/arm/mach-omap2/clockdomain.c +++ b/arch/arm/mach-omap2/clockdomain.c @@ -747,6 +747,7 @@ int clkdm_wakeup(struct clockdomain *clkdm) spin_lock_irqsave(&clkdm->lock, flags); clkdm->_flags &= ~_CLKDM_FLAG_HWSUP_ENABLED; ret = arch_clkdm->clkdm_wakeup(clkdm); + ret |= pwrdm_state_switch(clkdm->pwrdm.ptr); spin_unlock_irqrestore(&clkdm->lock, flags); return ret; } @@ -818,6 +819,7 @@ void clkdm_deny_idle(struct clockdomain *clkdm) spin_lock_irqsave(&clkdm->lock, flags); clkdm->_flags &= ~_CLKDM_FLAG_HWSUP_ENABLED; arch_clkdm->clkdm_deny_idle(clkdm); + pwrdm_state_switch(clkdm->pwrdm.ptr); spin_unlock_irqrestore(&clkdm->lock, flags); } diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index 3feb35911a32..472bf22d5e84 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -130,7 +130,6 @@ int omap_set_pwrdm_state(struct powerdomain *pwrdm, u32 state) } else { hwsup = clkdm_in_hwsup(pwrdm->pwrdm_clkdms[0]); clkdm_wakeup(pwrdm->pwrdm_clkdms[0]); - pwrdm_wait_transition(pwrdm); sleep_switch = FORCEWAKEUP_SWITCH; } } @@ -156,7 +155,6 @@ int omap_set_pwrdm_state(struct powerdomain *pwrdm, u32 state) return ret; } - pwrdm_wait_transition(pwrdm); pwrdm_state_switch(pwrdm); err: return ret; -- cgit v1.2.3 From 9c5f560173a466582d91bb06f4e3d2bafb0fee5c Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Fri, 19 Aug 2011 16:59:56 -0600 Subject: OMAP4: clock: re-enable previous clockdomain enable/disable sequence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After commit 665d001338b494d6d62810aa99b4c0fa1a0884b9 ("OMAP2+: hwmod: Follow the recommended PRCM module enable sequence"), device drivers for OMAP IP blocks that do not use runtime PM can cause oopses or kernel instability[1][2]. This is because those non-runtime PM drivers do not use the hwmod code, which implements the correct IP block enable and disable sequence. Several options for dealing with this problem have been proposed: 1. Add a new field to the OMAP struct clk to mark clocks that are currently used by non-runtime PM drivers. Modify the clock code to use the old clockdomain sequence for these marked clocks. As drivers are converted to use runtime PM, remove the annotation from the clocks. 2. Similar to #1, but associate the flag with the struct omap_clk instead. 3. Add IDLEST wait support to the OMAP4 clock code, similar to the way it is implemented for OMAP2/3, and enable it in each struct clk currently used by non-runtime PM drivers. As drivers are converted to use runtime PM, remove the annotation from the clocks. 4. Do nothing; leave the problem to those responsible for the unconverted drivers. 5. Re-enable clock-based clockdomain control in the OMAP4 clock code. This would revert back to the behavior of Linux 3.0, simply with a slightly longer module enable/disable latency. Unfortunately, no approach seemed particularly good. Options 1 through 3 seemed unwise due to the following reasons: A. The OMAP struct clks are intended primarily to describe hardware clock nodes, and the intention is that no driver-specific data should be stored there (applies to #1) B. The resulting patch would have been quite large for the -rc series (applies to #1, #2, #3) C. The patch would have been a new, yet temporary hack; and similar fixes have drawn negative comments in the recent past (see for example [3]) Option 4 is undesirable because commit 665d001338b494d6d62810aa99b4c0fa1a0884b9 ("OMAP2+: hwmod: Follow the recommended PRCM module enable sequence") has resulted in a less stable kernel; and kernel stability is more important than OMAP4 power management. Option 5 is the approach taken in this patch. This seemed to be the least intrusive approach for 3.1-rc. The approach in this patch was originally proposed by Ohad Ben-Cohen . I'm simply writing the commit message and passing it along. ... Thanks to Luciano Coelho for reporting the problem. Thanks to Ohad Ben-Cohen for tracking the problem down, generating a temporary workaround, and proposing a patch to deal with the problem. Thanks to Rajendra Nayak for proposing another patch to deal with the problem. Thanks to Felipe Balbi for comments. 1. Coelho, Luciano . _Re: Oops on ehci_hcd when booting 3.0.0-rc2 on panda_. Tue, 09 Aug 2011 14:26:08 +0300. Posted to the mailing list. Available from (among others) http://www.spinics.net/linux/lists/linux-omap/msg55213.html 2. Munegowda, Keshava . _Re: Oops on ehci_hcd when booting 3.0.0-rc2 on panda_. Thu, 11 Aug 2011 13:51:05 +0530. Posted to the mailing list. Available from (among others) http://www.spinics.net/linux/lists/linux-omap/msg55371.html 3. King, Russell . _Re: [PATCH 5/8] OMAP4: PM: TEMP: Prevent l3init from idling/force sleep_. Thu, 23 Jun 2011 16:22:49 +0100. Posted to the mailing list. Available from (among others) http://www.mail-archive.com/linux-omap@vger.kernel.org/msg51392.html Signed-off-by: Paul Walmsley Cc: Luciano Coelho Cc: Ohad Ben-Cohen Cc: Rajendra Nayak Cc: Benoît Cousson Acked-by: Santosh Shilimkar --- arch/arm/mach-omap2/clock44xx_data.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c index 2af0e3f00ce1..a3a1827837e8 100644 --- a/arch/arm/mach-omap2/clock44xx_data.c +++ b/arch/arm/mach-omap2/clock44xx_data.c @@ -3379,7 +3379,13 @@ int __init omap4xxx_clk_init(void) } clk_init(&omap2_clk_functions); - omap2_clk_disable_clkdm_control(); + + /* + * Must stay commented until all OMAP SoC drivers are + * converted to runtime PM, or drivers may start crashing + * + * omap2_clk_disable_clkdm_control(); + */ for (c = omap44xx_clks; c < omap44xx_clks + ARRAY_SIZE(omap44xx_clks); c++) -- cgit v1.2.3 From dccaf33fa37a1bc5d651baeb3bfeb6becb86597b Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Fri, 19 Aug 2011 19:13:32 -0400 Subject: ext4: flush any pending end_io requests before DIO reads w/dioread_nolock There is a race between ext4 buffer write and direct_IO read with dioread_nolock mount option enabled. The problem is that we clear PageWriteback flag during end_io time but will do uninitialized-to-initialized extent conversion later with dioread_nolock. If an O_direct read request comes in during this period, ext4 will return zero instead of the recently written data. This patch checks whether there are any pending uninitialized-to-initialized extent conversion requests before doing O_direct read to close the race. Note that this is just a bandaid fix. The fundamental issue is that we clear PageWriteback flag before we really complete an IO, which is problem-prone. To fix the fundamental issue, we may need to implement an extent tree cache that we can use to look up pending to-be-converted extents. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/indirect.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index b8602cde5b5a..0962642119c0 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -800,12 +800,17 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, } retry: - if (rw == READ && ext4_should_dioread_nolock(inode)) + if (rw == READ && ext4_should_dioread_nolock(inode)) { + if (unlikely(!list_empty(&ei->i_completed_io_list))) { + mutex_lock(&inode->i_mutex); + ext4_flush_completed_IO(inode); + mutex_unlock(&inode->i_mutex); + } ret = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, ext4_get_block, NULL, NULL, 0); - else { + } else { ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, ext4_get_block); -- cgit v1.2.3 From b6acf013bdc6f6ff9643030add85832d44034a28 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 20 Aug 2011 09:14:45 +0200 Subject: ALSA: hda - Don't spew too many ELD errors Currently HD-audio driver shows the all error ELD byte as an error in the kernel message. This is annoying when the video driver doesn't set the correct ELD from the beginning. e.g. radeon sends a zero-byte data, but we still check ELD with the fixed 128 byte as a workaround for some broken devices, it spews 128-times errors. For avoiding this, the driver aborts reading when the first byte is invalid. In such a case, the whole data is certainly invalid. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_eld.c | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/sound/pci/hda/hda_eld.c b/sound/pci/hda/hda_eld.c index 28ce17d09c33..c34f730f4815 100644 --- a/sound/pci/hda/hda_eld.c +++ b/sound/pci/hda/hda_eld.c @@ -144,25 +144,17 @@ static int cea_sampling_frequencies[8] = { SNDRV_PCM_RATE_192000, /* 7: 192000Hz */ }; -static unsigned char hdmi_get_eld_byte(struct hda_codec *codec, hda_nid_t nid, +static unsigned int hdmi_get_eld_data(struct hda_codec *codec, hda_nid_t nid, int byte_index) { unsigned int val; val = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_HDMI_ELDD, byte_index); - #ifdef BE_PARANOID printk(KERN_INFO "HDMI: ELD data byte %d: 0x%x\n", byte_index, val); #endif - - if ((val & AC_ELDD_ELD_VALID) == 0) { - snd_printd(KERN_INFO "HDMI: invalid ELD data byte %d\n", - byte_index); - val = 0; - } - - return val & AC_ELDD_ELD_DATA; + return val; } #define GRAB_BITS(buf, byte, lowbit, bits) \ @@ -344,11 +336,26 @@ int snd_hdmi_get_eld(struct hdmi_eld *eld, if (!buf) return -ENOMEM; - for (i = 0; i < size; i++) - buf[i] = hdmi_get_eld_byte(codec, nid, i); + for (i = 0; i < size; i++) { + unsigned int val = hdmi_get_eld_data(codec, nid, i); + if (!(val & AC_ELDD_ELD_VALID)) { + if (!i) { + snd_printd(KERN_INFO + "HDMI: invalid ELD data\n"); + ret = -EINVAL; + goto error; + } + snd_printd(KERN_INFO + "HDMI: invalid ELD data byte %d\n", i); + val = 0; + } else + val &= AC_ELDD_ELD_DATA; + buf[i] = val; + } ret = hdmi_update_eld(eld, buf, size); +error: kfree(buf); return ret; } -- cgit v1.2.3 From 1b004d03d8670bdd871e0f297ed20bc510e404de Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Sat, 20 Aug 2011 09:19:59 +0200 Subject: ALSA: hda - Fix error check from snd_hda_get_conn_index() in patch_cirrus.c snd_hda_get_conn_index() returns a negative value while the current code stores it in an unsigned int. It must be stored in a signed integer. Reported-by: Jesper Juhl Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_cirrus.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 47d6ffc9b5b5..d6c93d92b550 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -375,7 +375,7 @@ static int is_ext_mic(struct hda_codec *codec, unsigned int idx) static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin, unsigned int *idxp) { - int i; + int i, idx; hda_nid_t nid; nid = codec->start_nid; @@ -384,9 +384,11 @@ static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin, type = get_wcaps_type(get_wcaps(codec, nid)); if (type != AC_WID_AUD_IN) continue; - *idxp = snd_hda_get_conn_index(codec, nid, pin, false); - if (*idxp >= 0) + idx = snd_hda_get_conn_index(codec, nid, pin, false); + if (idx >= 0) { + *idxp = idx; return nid; + } } return 0; } -- cgit v1.2.3 From de75577c8c3ab733f808c65e1a9d55882efde68e Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 20 Aug 2011 08:12:41 +0200 Subject: ALSA: sound/aoa/fabrics/layout.c: remove unneeded kfree The label outnodev is only used when kzalloc has not yet taken place or has failed, so there is no need for the call for kfree under this label. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; expression E1!=0,E2,E3,E4; statement S; iterator I; @@ ( if (...) { ... when != kfree(x) when != x = E3 when != E3 = x * return ...; } ... when != x = E2 when != I(...,x,...) S if (...) { ... when != x = E4 kfree(x); ... return ...; } ) // Signed-off-by: Julia Lawall Signed-off-by: Takashi Iwai --- sound/aoa/fabrics/layout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/aoa/fabrics/layout.c b/sound/aoa/fabrics/layout.c index 3fd1a7e24928..552b97afbca5 100644 --- a/sound/aoa/fabrics/layout.c +++ b/sound/aoa/fabrics/layout.c @@ -1073,10 +1073,10 @@ static int aoa_fabric_layout_probe(struct soundbus_dev *sdev) sdev->pcmid = -1; list_del(&ldev->list); layouts_list_items--; + kfree(ldev); outnodev: of_node_put(sound); layout_device = NULL; - kfree(ldev); return -ENODEV; } -- cgit v1.2.3 From fbe5e29ec1886967255e76946aaf537b8cc9b81e Mon Sep 17 00:00:00 2001 From: Daniel Schwierzeck Date: Fri, 19 Aug 2011 12:04:20 +0000 Subject: atm: br2684: Fix oops due to skb->dev being NULL This oops have been already fixed with commit 27141666b69f535a4d63d7bc6d9e84ee5032f82a atm: [br2684] Fix oops due to skb->dev being NULL It happens that if a packet arrives in a VC between the call to open it on the hardware and the call to change the backend to br2684, br2684_regvcc processes the packet and oopses dereferencing skb->dev because it is NULL before the call to br2684_push(). but have been introduced again with commit b6211ae7f2e56837c6a4849316396d1535606e90 atm: Use SKB queue and list helpers instead of doing it by-hand. Signed-off-by: Daniel Schwierzeck Signed-off-by: David S. Miller --- net/atm/br2684.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 52cfd0c3ea71..d07223c834af 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -558,12 +558,13 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) spin_unlock_irqrestore(&rq->lock, flags); skb_queue_walk_safe(&queue, skb, tmp) { - struct net_device *dev = skb->dev; + struct net_device *dev; + + br2684_push(atmvcc, skb); + dev = skb->dev; dev->stats.rx_bytes -= skb->len; dev->stats.rx_packets--; - - br2684_push(atmvcc, skb); } /* initialize netdev carrier state */ -- cgit v1.2.3 From d70d43d7d719ab709af7df109e706e804fe21834 Mon Sep 17 00:00:00 2001 From: Jiejing Zhang Date: Sat, 20 Aug 2011 14:38:01 -0700 Subject: Input: max11801_ts - correct license statement The original license statement was confusing since it was unclear if the license was pure GPLv2 or GPLv2+ and did not match the license of the driver max11801_ts was derived from. The license is GPLv2+. Signed-off-by: Jiejing Zhang Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/max11801_ts.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/max11801_ts.c b/drivers/input/touchscreen/max11801_ts.c index 4f2713d92791..4627fe55b401 100644 --- a/drivers/input/touchscreen/max11801_ts.c +++ b/drivers/input/touchscreen/max11801_ts.c @@ -9,7 +9,8 @@ * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. */ /* -- cgit v1.2.3 From 5598473a5b40c47a8c5349dd2c2630797169cf1a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 20 Aug 2011 17:14:54 -0700 Subject: sparc: Allow handling signals when stack is corrupted. If we can't push the pending register windows onto the user's stack, we disallow signal delivery even if the signal would be delivered on a valid seperate signal stack. Add a register window save area in the signal frame, and store any unsavable windows there. On sigreturn, if any windows are still queued up in the signal frame, try to push them back onto the stack and if that fails we kill the process immediately. This allows the debug/tst-longjmp_chk2 glibc test case to pass. Signed-off-by: David S. Miller --- arch/sparc/include/asm/sigcontext.h | 14 +++ arch/sparc/kernel/Makefile | 1 + arch/sparc/kernel/signal32.c | 184 ++++++++++++++++++++---------------- arch/sparc/kernel/signal_32.c | 172 ++++++++++++++++----------------- arch/sparc/kernel/signal_64.c | 108 +++++++++------------ arch/sparc/kernel/sigutil.h | 9 ++ arch/sparc/kernel/sigutil_32.c | 120 +++++++++++++++++++++++ arch/sparc/kernel/sigutil_64.c | 93 ++++++++++++++++++ 8 files changed, 468 insertions(+), 233 deletions(-) create mode 100644 arch/sparc/kernel/sigutil.h create mode 100644 arch/sparc/kernel/sigutil_32.c create mode 100644 arch/sparc/kernel/sigutil_64.c diff --git a/arch/sparc/include/asm/sigcontext.h b/arch/sparc/include/asm/sigcontext.h index a1607d180354..69914d748130 100644 --- a/arch/sparc/include/asm/sigcontext.h +++ b/arch/sparc/include/asm/sigcontext.h @@ -45,6 +45,19 @@ typedef struct { int si_mask; } __siginfo32_t; +#define __SIGC_MAXWIN 7 + +typedef struct { + unsigned long locals[8]; + unsigned long ins[8]; +} __siginfo_reg_window; + +typedef struct { + int wsaved; + __siginfo_reg_window reg_window[__SIGC_MAXWIN]; + unsigned long rwbuf_stkptrs[__SIGC_MAXWIN]; +} __siginfo_rwin_t; + #ifdef CONFIG_SPARC64 typedef struct { unsigned int si_float_regs [64]; @@ -73,6 +86,7 @@ struct sigcontext { unsigned long ss_size; } sigc_stack; unsigned long sigc_mask; + __siginfo_rwin_t * sigc_rwin_save; }; #else diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index b90b4a1d070a..cb85458f89d2 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -32,6 +32,7 @@ obj-$(CONFIG_SPARC32) += sun4m_irq.o sun4c_irq.o sun4d_irq.o obj-y += process_$(BITS).o obj-y += signal_$(BITS).o +obj-y += sigutil_$(BITS).o obj-$(CONFIG_SPARC32) += ioport.o obj-y += setup_$(BITS).o obj-y += idprom.o diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 75fad425e249..1ba95aff5d59 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -29,6 +29,8 @@ #include #include +#include "sigutil.h" + #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) /* This magic should be in g_upper[0] for all upper parts @@ -44,14 +46,14 @@ typedef struct { struct signal_frame32 { struct sparc_stackf32 ss; __siginfo32_t info; - /* __siginfo_fpu32_t * */ u32 fpu_save; + /* __siginfo_fpu_t * */ u32 fpu_save; unsigned int insns[2]; unsigned int extramask[_COMPAT_NSIG_WORDS - 1]; unsigned int extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */ /* Only valid if (info.si_regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */ siginfo_extra_v8plus_t v8plus; - __siginfo_fpu_t fpu_state; -}; + /* __siginfo_rwin_t * */u32 rwin_save; +} __attribute__((aligned(8))); typedef struct compat_siginfo{ int si_signo; @@ -110,18 +112,14 @@ struct rt_signal_frame32 { compat_siginfo_t info; struct pt_regs32 regs; compat_sigset_t mask; - /* __siginfo_fpu32_t * */ u32 fpu_save; + /* __siginfo_fpu_t * */ u32 fpu_save; unsigned int insns[2]; stack_t32 stack; unsigned int extra_size; /* Should be sizeof(siginfo_extra_v8plus_t) */ /* Only valid if (regs.psr & (PSR_VERS|PSR_IMPL)) == PSR_V8PLUS */ siginfo_extra_v8plus_t v8plus; - __siginfo_fpu_t fpu_state; -}; - -/* Align macros */ -#define SF_ALIGNEDSZ (((sizeof(struct signal_frame32) + 15) & (~15))) -#define RT_ALIGNEDSZ (((sizeof(struct rt_signal_frame32) + 15) & (~15))) + /* __siginfo_rwin_t * */u32 rwin_save; +} __attribute__((aligned(8))); int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { @@ -192,30 +190,13 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) return 0; } -static int restore_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - unsigned long *fpregs = current_thread_info()->fpregs; - unsigned long fprs; - int err; - - err = __get_user(fprs, &fpu->si_fprs); - fprs_write(0); - regs->tstate &= ~TSTATE_PEF; - if (fprs & FPRS_DL) - err |= copy_from_user(fpregs, &fpu->si_float_regs[0], (sizeof(unsigned int) * 32)); - if (fprs & FPRS_DU) - err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], (sizeof(unsigned int) * 32)); - err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr); - err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr); - current_thread_info()->fpsaved[0] |= fprs; - return err; -} - void do_sigreturn32(struct pt_regs *regs) { struct signal_frame32 __user *sf; + compat_uptr_t fpu_save; + compat_uptr_t rwin_save; unsigned int psr; - unsigned pc, npc, fpu_save; + unsigned pc, npc; sigset_t set; unsigned seta[_COMPAT_NSIG_WORDS]; int err, i; @@ -273,8 +254,13 @@ void do_sigreturn32(struct pt_regs *regs) pt_regs_clear_syscall(regs); err |= __get_user(fpu_save, &sf->fpu_save); - if (fpu_save) - err |= restore_fpu_state32(regs, &sf->fpu_state); + if (!err && fpu_save) + err |= restore_fpu_state(regs, compat_ptr(fpu_save)); + err |= __get_user(rwin_save, &sf->rwin_save); + if (!err && rwin_save) { + if (restore_rwin_state(compat_ptr(rwin_save))) + goto segv; + } err |= __get_user(seta[0], &sf->info.si_mask); err |= copy_from_user(seta+1, &sf->extramask, (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int)); @@ -300,7 +286,9 @@ segv: asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) { struct rt_signal_frame32 __user *sf; - unsigned int psr, pc, npc, fpu_save, u_ss_sp; + unsigned int psr, pc, npc, u_ss_sp; + compat_uptr_t fpu_save; + compat_uptr_t rwin_save; mm_segment_t old_fs; sigset_t set; compat_sigset_t seta; @@ -359,8 +347,8 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) pt_regs_clear_syscall(regs); err |= __get_user(fpu_save, &sf->fpu_save); - if (fpu_save) - err |= restore_fpu_state32(regs, &sf->fpu_state); + if (!err && fpu_save) + err |= restore_fpu_state(regs, compat_ptr(fpu_save)); err |= copy_from_user(&seta, &sf->mask, sizeof(compat_sigset_t)); err |= __get_user(u_ss_sp, &sf->stack.ss_sp); st.ss_sp = compat_ptr(u_ss_sp); @@ -376,6 +364,12 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) do_sigaltstack((stack_t __user *) &st, NULL, (unsigned long)sf); set_fs(old_fs); + err |= __get_user(rwin_save, &sf->rwin_save); + if (!err && rwin_save) { + if (restore_rwin_state(compat_ptr(rwin_save))) + goto segv; + } + switch (_NSIG_WORDS) { case 4: set.sig[3] = seta.sig[6] + (((long)seta.sig[7]) << 32); case 3: set.sig[2] = seta.sig[4] + (((long)seta.sig[5]) << 32); @@ -433,26 +427,6 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns return (void __user *) sp; } -static int save_fpu_state32(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - unsigned long *fpregs = current_thread_info()->fpregs; - unsigned long fprs; - int err = 0; - - fprs = current_thread_info()->fpsaved[0]; - if (fprs & FPRS_DL) - err |= copy_to_user(&fpu->si_float_regs[0], fpregs, - (sizeof(unsigned int) * 32)); - if (fprs & FPRS_DU) - err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16, - (sizeof(unsigned int) * 32)); - err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr); - err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr); - err |= __put_user(fprs, &fpu->si_fprs); - - return err; -} - /* The I-cache flush instruction only works in the primary ASI, which * right now is the nucleus, aka. kernel space. * @@ -515,18 +489,23 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, int signo, sigset_t *oldset) { struct signal_frame32 __user *sf; + int i, err, wsaved; + void __user *tail; int sigframe_size; u32 psr; - int i, err; unsigned int seta[_COMPAT_NSIG_WORDS]; /* 1. Make sure everything is clean */ synchronize_user_stack(); save_and_clear_fpu(); - sigframe_size = SF_ALIGNEDSZ; - if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) - sigframe_size -= sizeof(__siginfo_fpu_t); + wsaved = get_thread_wsaved(); + + sigframe_size = sizeof(*sf); + if (current_thread_info()->fpsaved[0] & FPRS_FEF) + sigframe_size += sizeof(__siginfo_fpu_t); + if (wsaved) + sigframe_size += sizeof(__siginfo_rwin_t); sf = (struct signal_frame32 __user *) get_sigframe(&ka->sa, regs, sigframe_size); @@ -534,8 +513,7 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, if (invalid_frame_pointer(sf, sigframe_size)) goto sigill; - if (get_thread_wsaved() != 0) - goto sigill; + tail = (sf + 1); /* 2. Save the current process state */ if (test_thread_flag(TIF_32BIT)) { @@ -560,11 +538,22 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, &sf->v8plus.asi); if (psr & PSR_EF) { - err |= save_fpu_state32(regs, &sf->fpu_state); - err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save); + __siginfo_fpu_t __user *fp = tail; + tail += sizeof(*fp); + err |= save_fpu_state(regs, fp); + err |= __put_user((u64)fp, &sf->fpu_save); } else { err |= __put_user(0, &sf->fpu_save); } + if (wsaved) { + __siginfo_rwin_t __user *rwp = tail; + tail += sizeof(*rwp); + err |= save_rwin_state(wsaved, rwp); + err |= __put_user((u64)rwp, &sf->rwin_save); + set_thread_wsaved(0); + } else { + err |= __put_user(0, &sf->rwin_save); + } switch (_NSIG_WORDS) { case 4: seta[7] = (oldset->sig[3] >> 32); @@ -580,10 +569,21 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, err |= __copy_to_user(sf->extramask, seta + 1, (_COMPAT_NSIG_WORDS - 1) * sizeof(unsigned int)); - err |= copy_in_user((u32 __user *)sf, - (u32 __user *)(regs->u_regs[UREG_FP]), - sizeof(struct reg_window32)); - + if (!wsaved) { + err |= copy_in_user((u32 __user *)sf, + (u32 __user *)(regs->u_regs[UREG_FP]), + sizeof(struct reg_window32)); + } else { + struct reg_window *rp; + + rp = ¤t_thread_info()->reg_window[wsaved - 1]; + for (i = 0; i < 8; i++) + err |= __put_user(rp->locals[i], &sf->ss.locals[i]); + for (i = 0; i < 6; i++) + err |= __put_user(rp->ins[i], &sf->ss.ins[i]); + err |= __put_user(rp->ins[6], &sf->ss.fp); + err |= __put_user(rp->ins[7], &sf->ss.callers_pc); + } if (err) goto sigsegv; @@ -613,7 +613,6 @@ static int setup_frame32(struct k_sigaction *ka, struct pt_regs *regs, err |= __put_user(0x91d02010, &sf->insns[1]); /*t 0x10*/ if (err) goto sigsegv; - flush_signal_insns(address); } return 0; @@ -632,18 +631,23 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, siginfo_t *info) { struct rt_signal_frame32 __user *sf; + int i, err, wsaved; + void __user *tail; int sigframe_size; u32 psr; - int i, err; compat_sigset_t seta; /* 1. Make sure everything is clean */ synchronize_user_stack(); save_and_clear_fpu(); - sigframe_size = RT_ALIGNEDSZ; - if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) - sigframe_size -= sizeof(__siginfo_fpu_t); + wsaved = get_thread_wsaved(); + + sigframe_size = sizeof(*sf); + if (current_thread_info()->fpsaved[0] & FPRS_FEF) + sigframe_size += sizeof(__siginfo_fpu_t); + if (wsaved) + sigframe_size += sizeof(__siginfo_rwin_t); sf = (struct rt_signal_frame32 __user *) get_sigframe(&ka->sa, regs, sigframe_size); @@ -651,8 +655,7 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, if (invalid_frame_pointer(sf, sigframe_size)) goto sigill; - if (get_thread_wsaved() != 0) - goto sigill; + tail = (sf + 1); /* 2. Save the current process state */ if (test_thread_flag(TIF_32BIT)) { @@ -677,11 +680,22 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, &sf->v8plus.asi); if (psr & PSR_EF) { - err |= save_fpu_state32(regs, &sf->fpu_state); - err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save); + __siginfo_fpu_t __user *fp = tail; + tail += sizeof(*fp); + err |= save_fpu_state(regs, fp); + err |= __put_user((u64)fp, &sf->fpu_save); } else { err |= __put_user(0, &sf->fpu_save); } + if (wsaved) { + __siginfo_rwin_t __user *rwp = tail; + tail += sizeof(*rwp); + err |= save_rwin_state(wsaved, rwp); + err |= __put_user((u64)rwp, &sf->rwin_save); + set_thread_wsaved(0); + } else { + err |= __put_user(0, &sf->rwin_save); + } /* Update the siginfo structure. */ err |= copy_siginfo_to_user32(&sf->info, info); @@ -703,9 +717,21 @@ static int setup_rt_frame32(struct k_sigaction *ka, struct pt_regs *regs, } err |= __copy_to_user(&sf->mask, &seta, sizeof(compat_sigset_t)); - err |= copy_in_user((u32 __user *)sf, - (u32 __user *)(regs->u_regs[UREG_FP]), - sizeof(struct reg_window32)); + if (!wsaved) { + err |= copy_in_user((u32 __user *)sf, + (u32 __user *)(regs->u_regs[UREG_FP]), + sizeof(struct reg_window32)); + } else { + struct reg_window *rp; + + rp = ¤t_thread_info()->reg_window[wsaved - 1]; + for (i = 0; i < 8; i++) + err |= __put_user(rp->locals[i], &sf->ss.locals[i]); + for (i = 0; i < 6; i++) + err |= __put_user(rp->ins[i], &sf->ss.ins[i]); + err |= __put_user(rp->ins[6], &sf->ss.fp); + err |= __put_user(rp->ins[7], &sf->ss.callers_pc); + } if (err) goto sigsegv; diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c index 5e5c5fd03783..04ede8f04add 100644 --- a/arch/sparc/kernel/signal_32.c +++ b/arch/sparc/kernel/signal_32.c @@ -26,6 +26,8 @@ #include #include /* flush_sig_insns */ +#include "sigutil.h" + #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) extern void fpsave(unsigned long *fpregs, unsigned long *fsr, @@ -39,8 +41,8 @@ struct signal_frame { unsigned long insns[2] __attribute__ ((aligned (8))); unsigned int extramask[_NSIG_WORDS - 1]; unsigned int extra_size; /* Should be 0 */ - __siginfo_fpu_t fpu_state; -}; + __siginfo_rwin_t __user *rwin_save; +} __attribute__((aligned(8))); struct rt_signal_frame { struct sparc_stackf ss; @@ -51,8 +53,8 @@ struct rt_signal_frame { unsigned int insns[2]; stack_t stack; unsigned int extra_size; /* Should be 0 */ - __siginfo_fpu_t fpu_state; -}; + __siginfo_rwin_t __user *rwin_save; +} __attribute__((aligned(8))); /* Align macros */ #define SF_ALIGNEDSZ (((sizeof(struct signal_frame) + 7) & (~7))) @@ -79,43 +81,13 @@ asmlinkage int sys_sigsuspend(old_sigset_t set) return _sigpause_common(set); } -static inline int -restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - int err; -#ifdef CONFIG_SMP - if (test_tsk_thread_flag(current, TIF_USEDFPU)) - regs->psr &= ~PSR_EF; -#else - if (current == last_task_used_math) { - last_task_used_math = NULL; - regs->psr &= ~PSR_EF; - } -#endif - set_used_math(); - clear_tsk_thread_flag(current, TIF_USEDFPU); - - if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu))) - return -EFAULT; - - err = __copy_from_user(¤t->thread.float_regs[0], &fpu->si_float_regs[0], - (sizeof(unsigned long) * 32)); - err |= __get_user(current->thread.fsr, &fpu->si_fsr); - err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth); - if (current->thread.fpqdepth != 0) - err |= __copy_from_user(¤t->thread.fpqueue[0], - &fpu->si_fpqueue[0], - ((sizeof(unsigned long) + - (sizeof(unsigned long *)))*16)); - return err; -} - asmlinkage void do_sigreturn(struct pt_regs *regs) { struct signal_frame __user *sf; unsigned long up_psr, pc, npc; sigset_t set; __siginfo_fpu_t __user *fpu_save; + __siginfo_rwin_t __user *rwin_save; int err; /* Always make any pending restarted system calls return -EINTR */ @@ -150,9 +122,11 @@ asmlinkage void do_sigreturn(struct pt_regs *regs) pt_regs_clear_syscall(regs); err |= __get_user(fpu_save, &sf->fpu_save); - if (fpu_save) err |= restore_fpu_state(regs, fpu_save); + err |= __get_user(rwin_save, &sf->rwin_save); + if (rwin_save) + err |= restore_rwin_state(rwin_save); /* This is pretty much atomic, no amount locking would prevent * the races which exist anyways. @@ -180,6 +154,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) struct rt_signal_frame __user *sf; unsigned int psr, pc, npc; __siginfo_fpu_t __user *fpu_save; + __siginfo_rwin_t __user *rwin_save; mm_segment_t old_fs; sigset_t set; stack_t st; @@ -207,8 +182,7 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) pt_regs_clear_syscall(regs); err |= __get_user(fpu_save, &sf->fpu_save); - - if (fpu_save) + if (!err && fpu_save) err |= restore_fpu_state(regs, fpu_save); err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t)); @@ -228,6 +202,12 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs) do_sigaltstack((const stack_t __user *) &st, NULL, (unsigned long)sf); set_fs(old_fs); + err |= __get_user(rwin_save, &sf->rwin_save); + if (!err && rwin_save) { + if (restore_rwin_state(rwin_save)) + goto segv; + } + sigdelsetmask(&set, ~_BLOCKABLE); spin_lock_irq(¤t->sighand->siglock); current->blocked = set; @@ -280,53 +260,23 @@ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *re return (void __user *) sp; } -static inline int -save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - int err = 0; -#ifdef CONFIG_SMP - if (test_tsk_thread_flag(current, TIF_USEDFPU)) { - put_psr(get_psr() | PSR_EF); - fpsave(¤t->thread.float_regs[0], ¤t->thread.fsr, - ¤t->thread.fpqueue[0], ¤t->thread.fpqdepth); - regs->psr &= ~(PSR_EF); - clear_tsk_thread_flag(current, TIF_USEDFPU); - } -#else - if (current == last_task_used_math) { - put_psr(get_psr() | PSR_EF); - fpsave(¤t->thread.float_regs[0], ¤t->thread.fsr, - ¤t->thread.fpqueue[0], ¤t->thread.fpqdepth); - last_task_used_math = NULL; - regs->psr &= ~(PSR_EF); - } -#endif - err |= __copy_to_user(&fpu->si_float_regs[0], - ¤t->thread.float_regs[0], - (sizeof(unsigned long) * 32)); - err |= __put_user(current->thread.fsr, &fpu->si_fsr); - err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth); - if (current->thread.fpqdepth != 0) - err |= __copy_to_user(&fpu->si_fpqueue[0], - ¤t->thread.fpqueue[0], - ((sizeof(unsigned long) + - (sizeof(unsigned long *)))*16)); - clear_used_math(); - return err; -} - static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs, int signo, sigset_t *oldset) { struct signal_frame __user *sf; - int sigframe_size, err; + int sigframe_size, err, wsaved; + void __user *tail; /* 1. Make sure everything is clean */ synchronize_user_stack(); - sigframe_size = SF_ALIGNEDSZ; - if (!used_math()) - sigframe_size -= sizeof(__siginfo_fpu_t); + wsaved = current_thread_info()->w_saved; + + sigframe_size = sizeof(*sf); + if (used_math()) + sigframe_size += sizeof(__siginfo_fpu_t); + if (wsaved) + sigframe_size += sizeof(__siginfo_rwin_t); sf = (struct signal_frame __user *) get_sigframe(&ka->sa, regs, sigframe_size); @@ -334,8 +284,7 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs, if (invalid_frame_pointer(sf, sigframe_size)) goto sigill_and_return; - if (current_thread_info()->w_saved != 0) - goto sigill_and_return; + tail = sf + 1; /* 2. Save the current process state */ err = __copy_to_user(&sf->info.si_regs, regs, sizeof(struct pt_regs)); @@ -343,17 +292,34 @@ static int setup_frame(struct k_sigaction *ka, struct pt_regs *regs, err |= __put_user(0, &sf->extra_size); if (used_math()) { - err |= save_fpu_state(regs, &sf->fpu_state); - err |= __put_user(&sf->fpu_state, &sf->fpu_save); + __siginfo_fpu_t __user *fp = tail; + tail += sizeof(*fp); + err |= save_fpu_state(regs, fp); + err |= __put_user(fp, &sf->fpu_save); } else { err |= __put_user(0, &sf->fpu_save); } + if (wsaved) { + __siginfo_rwin_t __user *rwp = tail; + tail += sizeof(*rwp); + err |= save_rwin_state(wsaved, rwp); + err |= __put_user(rwp, &sf->rwin_save); + } else { + err |= __put_user(0, &sf->rwin_save); + } err |= __put_user(oldset->sig[0], &sf->info.si_mask); err |= __copy_to_user(sf->extramask, &oldset->sig[1], (_NSIG_WORDS - 1) * sizeof(unsigned int)); - err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP], - sizeof(struct reg_window32)); + if (!wsaved) { + err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP], + sizeof(struct reg_window32)); + } else { + struct reg_window32 *rp; + + rp = ¤t_thread_info()->reg_window[wsaved - 1]; + err |= __copy_to_user(sf, rp, sizeof(struct reg_window32)); + } if (err) goto sigsegv; @@ -399,21 +365,24 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, int signo, sigset_t *oldset, siginfo_t *info) { struct rt_signal_frame __user *sf; - int sigframe_size; + int sigframe_size, wsaved; + void __user *tail; unsigned int psr; int err; synchronize_user_stack(); - sigframe_size = RT_ALIGNEDSZ; - if (!used_math()) - sigframe_size -= sizeof(__siginfo_fpu_t); + wsaved = current_thread_info()->w_saved; + sigframe_size = sizeof(*sf); + if (used_math()) + sigframe_size += sizeof(__siginfo_fpu_t); + if (wsaved) + sigframe_size += sizeof(__siginfo_rwin_t); sf = (struct rt_signal_frame __user *) get_sigframe(&ka->sa, regs, sigframe_size); if (invalid_frame_pointer(sf, sigframe_size)) goto sigill; - if (current_thread_info()->w_saved != 0) - goto sigill; + tail = sf + 1; err = __put_user(regs->pc, &sf->regs.pc); err |= __put_user(regs->npc, &sf->regs.npc); err |= __put_user(regs->y, &sf->regs.y); @@ -425,11 +394,21 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, err |= __put_user(0, &sf->extra_size); if (psr & PSR_EF) { - err |= save_fpu_state(regs, &sf->fpu_state); - err |= __put_user(&sf->fpu_state, &sf->fpu_save); + __siginfo_fpu_t *fp = tail; + tail += sizeof(*fp); + err |= save_fpu_state(regs, fp); + err |= __put_user(fp, &sf->fpu_save); } else { err |= __put_user(0, &sf->fpu_save); } + if (wsaved) { + __siginfo_rwin_t *rwp = tail; + tail += sizeof(*rwp); + err |= save_rwin_state(wsaved, rwp); + err |= __put_user(rwp, &sf->rwin_save); + } else { + err |= __put_user(0, &sf->rwin_save); + } err |= __copy_to_user(&sf->mask, &oldset->sig[0], sizeof(sigset_t)); /* Setup sigaltstack */ @@ -437,8 +416,15 @@ static int setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, err |= __put_user(sas_ss_flags(regs->u_regs[UREG_FP]), &sf->stack.ss_flags); err |= __put_user(current->sas_ss_size, &sf->stack.ss_size); - err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP], - sizeof(struct reg_window32)); + if (!wsaved) { + err |= __copy_to_user(sf, (char *) regs->u_regs[UREG_FP], + sizeof(struct reg_window32)); + } else { + struct reg_window32 *rp; + + rp = ¤t_thread_info()->reg_window[wsaved - 1]; + err |= __copy_to_user(sf, rp, sizeof(struct reg_window32)); + } err |= copy_siginfo_to_user(&sf->info, info); diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c index 006fe4515886..47509df3b893 100644 --- a/arch/sparc/kernel/signal_64.c +++ b/arch/sparc/kernel/signal_64.c @@ -34,6 +34,7 @@ #include "entry.h" #include "systbls.h" +#include "sigutil.h" #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) @@ -236,7 +237,7 @@ struct rt_signal_frame { __siginfo_fpu_t __user *fpu_save; stack_t stack; sigset_t mask; - __siginfo_fpu_t fpu_state; + __siginfo_rwin_t *rwin_save; }; static long _sigpause_common(old_sigset_t set) @@ -266,33 +267,12 @@ asmlinkage long sys_sigsuspend(old_sigset_t set) return _sigpause_common(set); } -static inline int -restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - unsigned long *fpregs = current_thread_info()->fpregs; - unsigned long fprs; - int err; - - err = __get_user(fprs, &fpu->si_fprs); - fprs_write(0); - regs->tstate &= ~TSTATE_PEF; - if (fprs & FPRS_DL) - err |= copy_from_user(fpregs, &fpu->si_float_regs[0], - (sizeof(unsigned int) * 32)); - if (fprs & FPRS_DU) - err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], - (sizeof(unsigned int) * 32)); - err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr); - err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr); - current_thread_info()->fpsaved[0] |= fprs; - return err; -} - void do_rt_sigreturn(struct pt_regs *regs) { struct rt_signal_frame __user *sf; unsigned long tpc, tnpc, tstate; __siginfo_fpu_t __user *fpu_save; + __siginfo_rwin_t __user *rwin_save; sigset_t set; int err; @@ -325,8 +305,8 @@ void do_rt_sigreturn(struct pt_regs *regs) regs->tstate |= (tstate & (TSTATE_ASI | TSTATE_ICC | TSTATE_XCC)); err |= __get_user(fpu_save, &sf->fpu_save); - if (fpu_save) - err |= restore_fpu_state(regs, &sf->fpu_state); + if (!err && fpu_save) + err |= restore_fpu_state(regs, fpu_save); err |= __copy_from_user(&set, &sf->mask, sizeof(sigset_t)); err |= do_sigaltstack(&sf->stack, NULL, (unsigned long)sf); @@ -334,6 +314,12 @@ void do_rt_sigreturn(struct pt_regs *regs) if (err) goto segv; + err |= __get_user(rwin_save, &sf->rwin_save); + if (!err && rwin_save) { + if (restore_rwin_state(rwin_save)) + goto segv; + } + regs->tpc = tpc; regs->tnpc = tnpc; @@ -351,34 +337,13 @@ segv: } /* Checks if the fp is valid */ -static int invalid_frame_pointer(void __user *fp, int fplen) +static int invalid_frame_pointer(void __user *fp) { if (((unsigned long) fp) & 15) return 1; return 0; } -static inline int -save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) -{ - unsigned long *fpregs = current_thread_info()->fpregs; - unsigned long fprs; - int err = 0; - - fprs = current_thread_info()->fpsaved[0]; - if (fprs & FPRS_DL) - err |= copy_to_user(&fpu->si_float_regs[0], fpregs, - (sizeof(unsigned int) * 32)); - if (fprs & FPRS_DU) - err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16, - (sizeof(unsigned int) * 32)); - err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr); - err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr); - err |= __put_user(fprs, &fpu->si_fprs); - - return err; -} - static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize) { unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS; @@ -414,34 +379,48 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, int signo, sigset_t *oldset, siginfo_t *info) { struct rt_signal_frame __user *sf; - int sigframe_size, err; + int wsaved, err, sf_size; + void __user *tail; /* 1. Make sure everything is clean */ synchronize_user_stack(); save_and_clear_fpu(); - sigframe_size = sizeof(struct rt_signal_frame); - if (!(current_thread_info()->fpsaved[0] & FPRS_FEF)) - sigframe_size -= sizeof(__siginfo_fpu_t); + wsaved = get_thread_wsaved(); + sf_size = sizeof(struct rt_signal_frame); + if (current_thread_info()->fpsaved[0] & FPRS_FEF) + sf_size += sizeof(__siginfo_fpu_t); + if (wsaved) + sf_size += sizeof(__siginfo_rwin_t); sf = (struct rt_signal_frame __user *) - get_sigframe(ka, regs, sigframe_size); - - if (invalid_frame_pointer (sf, sigframe_size)) - goto sigill; + get_sigframe(ka, regs, sf_size); - if (get_thread_wsaved() != 0) + if (invalid_frame_pointer (sf)) goto sigill; + tail = (sf + 1); + /* 2. Save the current process state */ err = copy_to_user(&sf->regs, regs, sizeof (*regs)); if (current_thread_info()->fpsaved[0] & FPRS_FEF) { - err |= save_fpu_state(regs, &sf->fpu_state); - err |= __put_user((u64)&sf->fpu_state, &sf->fpu_save); + __siginfo_fpu_t __user *fpu_save = tail; + tail += sizeof(__siginfo_fpu_t); + err |= save_fpu_state(regs, fpu_save); + err |= __put_user((u64)fpu_save, &sf->fpu_save); } else { err |= __put_user(0, &sf->fpu_save); } + if (wsaved) { + __siginfo_rwin_t __user *rwin_save = tail; + tail += sizeof(__siginfo_rwin_t); + err |= save_rwin_state(wsaved, rwin_save); + err |= __put_user((u64)rwin_save, &sf->rwin_save); + set_thread_wsaved(0); + } else { + err |= __put_user(0, &sf->rwin_save); + } /* Setup sigaltstack */ err |= __put_user(current->sas_ss_sp, &sf->stack.ss_sp); @@ -450,10 +429,17 @@ setup_rt_frame(struct k_sigaction *ka, struct pt_regs *regs, err |= copy_to_user(&sf->mask, oldset, sizeof(sigset_t)); - err |= copy_in_user((u64 __user *)sf, - (u64 __user *)(regs->u_regs[UREG_FP]+STACK_BIAS), - sizeof(struct reg_window)); + if (!wsaved) { + err |= copy_in_user((u64 __user *)sf, + (u64 __user *)(regs->u_regs[UREG_FP] + + STACK_BIAS), + sizeof(struct reg_window)); + } else { + struct reg_window *rp; + rp = ¤t_thread_info()->reg_window[wsaved - 1]; + err |= copy_to_user(sf, rp, sizeof(struct reg_window)); + } if (info) err |= copy_siginfo_to_user(&sf->info, info); else { diff --git a/arch/sparc/kernel/sigutil.h b/arch/sparc/kernel/sigutil.h new file mode 100644 index 000000000000..d223aa432bb6 --- /dev/null +++ b/arch/sparc/kernel/sigutil.h @@ -0,0 +1,9 @@ +#ifndef _SIGUTIL_H +#define _SIGUTIL_H + +int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu); +int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu); +int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin); +int restore_rwin_state(__siginfo_rwin_t __user *rp); + +#endif /* _SIGUTIL_H */ diff --git a/arch/sparc/kernel/sigutil_32.c b/arch/sparc/kernel/sigutil_32.c new file mode 100644 index 000000000000..35c7897b009a --- /dev/null +++ b/arch/sparc/kernel/sigutil_32.c @@ -0,0 +1,120 @@ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "sigutil.h" + +int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + int err = 0; +#ifdef CONFIG_SMP + if (test_tsk_thread_flag(current, TIF_USEDFPU)) { + put_psr(get_psr() | PSR_EF); + fpsave(¤t->thread.float_regs[0], ¤t->thread.fsr, + ¤t->thread.fpqueue[0], ¤t->thread.fpqdepth); + regs->psr &= ~(PSR_EF); + clear_tsk_thread_flag(current, TIF_USEDFPU); + } +#else + if (current == last_task_used_math) { + put_psr(get_psr() | PSR_EF); + fpsave(¤t->thread.float_regs[0], ¤t->thread.fsr, + ¤t->thread.fpqueue[0], ¤t->thread.fpqdepth); + last_task_used_math = NULL; + regs->psr &= ~(PSR_EF); + } +#endif + err |= __copy_to_user(&fpu->si_float_regs[0], + ¤t->thread.float_regs[0], + (sizeof(unsigned long) * 32)); + err |= __put_user(current->thread.fsr, &fpu->si_fsr); + err |= __put_user(current->thread.fpqdepth, &fpu->si_fpqdepth); + if (current->thread.fpqdepth != 0) + err |= __copy_to_user(&fpu->si_fpqueue[0], + ¤t->thread.fpqueue[0], + ((sizeof(unsigned long) + + (sizeof(unsigned long *)))*16)); + clear_used_math(); + return err; +} + +int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + int err; +#ifdef CONFIG_SMP + if (test_tsk_thread_flag(current, TIF_USEDFPU)) + regs->psr &= ~PSR_EF; +#else + if (current == last_task_used_math) { + last_task_used_math = NULL; + regs->psr &= ~PSR_EF; + } +#endif + set_used_math(); + clear_tsk_thread_flag(current, TIF_USEDFPU); + + if (!access_ok(VERIFY_READ, fpu, sizeof(*fpu))) + return -EFAULT; + + err = __copy_from_user(¤t->thread.float_regs[0], &fpu->si_float_regs[0], + (sizeof(unsigned long) * 32)); + err |= __get_user(current->thread.fsr, &fpu->si_fsr); + err |= __get_user(current->thread.fpqdepth, &fpu->si_fpqdepth); + if (current->thread.fpqdepth != 0) + err |= __copy_from_user(¤t->thread.fpqueue[0], + &fpu->si_fpqueue[0], + ((sizeof(unsigned long) + + (sizeof(unsigned long *)))*16)); + return err; +} + +int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin) +{ + int i, err = __put_user(wsaved, &rwin->wsaved); + + for (i = 0; i < wsaved; i++) { + struct reg_window32 *rp; + unsigned long fp; + + rp = ¤t_thread_info()->reg_window[i]; + fp = current_thread_info()->rwbuf_stkptrs[i]; + err |= copy_to_user(&rwin->reg_window[i], rp, + sizeof(struct reg_window32)); + err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]); + } + return err; +} + +int restore_rwin_state(__siginfo_rwin_t __user *rp) +{ + struct thread_info *t = current_thread_info(); + int i, wsaved, err; + + __get_user(wsaved, &rp->wsaved); + if (wsaved > NSWINS) + return -EFAULT; + + err = 0; + for (i = 0; i < wsaved; i++) { + err |= copy_from_user(&t->reg_window[i], + &rp->reg_window[i], + sizeof(struct reg_window32)); + err |= __get_user(t->rwbuf_stkptrs[i], + &rp->rwbuf_stkptrs[i]); + } + if (err) + return err; + + t->w_saved = wsaved; + synchronize_user_stack(); + if (t->w_saved) + return -EFAULT; + return 0; + +} diff --git a/arch/sparc/kernel/sigutil_64.c b/arch/sparc/kernel/sigutil_64.c new file mode 100644 index 000000000000..e7dc508c38eb --- /dev/null +++ b/arch/sparc/kernel/sigutil_64.c @@ -0,0 +1,93 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include "sigutil.h" + +int save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + unsigned long *fpregs = current_thread_info()->fpregs; + unsigned long fprs; + int err = 0; + + fprs = current_thread_info()->fpsaved[0]; + if (fprs & FPRS_DL) + err |= copy_to_user(&fpu->si_float_regs[0], fpregs, + (sizeof(unsigned int) * 32)); + if (fprs & FPRS_DU) + err |= copy_to_user(&fpu->si_float_regs[32], fpregs+16, + (sizeof(unsigned int) * 32)); + err |= __put_user(current_thread_info()->xfsr[0], &fpu->si_fsr); + err |= __put_user(current_thread_info()->gsr[0], &fpu->si_gsr); + err |= __put_user(fprs, &fpu->si_fprs); + + return err; +} + +int restore_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu) +{ + unsigned long *fpregs = current_thread_info()->fpregs; + unsigned long fprs; + int err; + + err = __get_user(fprs, &fpu->si_fprs); + fprs_write(0); + regs->tstate &= ~TSTATE_PEF; + if (fprs & FPRS_DL) + err |= copy_from_user(fpregs, &fpu->si_float_regs[0], + (sizeof(unsigned int) * 32)); + if (fprs & FPRS_DU) + err |= copy_from_user(fpregs+16, &fpu->si_float_regs[32], + (sizeof(unsigned int) * 32)); + err |= __get_user(current_thread_info()->xfsr[0], &fpu->si_fsr); + err |= __get_user(current_thread_info()->gsr[0], &fpu->si_gsr); + current_thread_info()->fpsaved[0] |= fprs; + return err; +} + +int save_rwin_state(int wsaved, __siginfo_rwin_t __user *rwin) +{ + int i, err = __put_user(wsaved, &rwin->wsaved); + + for (i = 0; i < wsaved; i++) { + struct reg_window *rp = ¤t_thread_info()->reg_window[i]; + unsigned long fp = current_thread_info()->rwbuf_stkptrs[i]; + + err |= copy_to_user(&rwin->reg_window[i], rp, + sizeof(struct reg_window)); + err |= __put_user(fp, &rwin->rwbuf_stkptrs[i]); + } + return err; +} + +int restore_rwin_state(__siginfo_rwin_t __user *rp) +{ + struct thread_info *t = current_thread_info(); + int i, wsaved, err; + + __get_user(wsaved, &rp->wsaved); + if (wsaved > NSWINS) + return -EFAULT; + + err = 0; + for (i = 0; i < wsaved; i++) { + err |= copy_from_user(&t->reg_window[i], + &rp->reg_window[i], + sizeof(struct reg_window)); + err |= __get_user(t->rwbuf_stkptrs[i], + &rp->rwbuf_stkptrs[i]); + } + if (err) + return err; + + set_thread_wsaved(wsaved); + synchronize_user_stack(); + if (get_thread_wsaved()) + return -EFAULT; + return 0; +} -- cgit v1.2.3 From 47c08f3107270e5a439bc0106a308f7c48c9621d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 20 Aug 2011 11:49:43 -0700 Subject: pci: fix new kernel-doc warning in pci.c Fix new kernel-doc warning in pci.c: Warning(drivers/pci/pci.c:3259): No description found for parameter 'mps' Warning(drivers/pci/pci.c:3259): Excess function parameter 'rq' description in 'pcie_set_mps' Signed-off-by: Randy Dunlap Cc: Jesse Barnes Signed-off-by: Linus Torvalds --- drivers/pci/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 466fad6e6ee2..0ce67423a0a3 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3250,7 +3250,7 @@ int pcie_get_mps(struct pci_dev *dev) /** * pcie_set_mps - set PCI Express maximum payload size * @dev: PCI device to query - * @rq: maximum payload size in bytes + * @mps: maximum payload size in bytes * valid values are 128, 256, 512, 1024, 2048, 4096 * * If possible sets maximum payload size -- cgit v1.2.3 From 450a37d2eca6ddf6ea8186f57a7531318df6e796 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Sun, 21 Aug 2011 00:28:56 -0600 Subject: OMAP4: clock: fix compile warning Fix the following compile warning: arch/arm/mach-omap2/clock44xx_data.c: In function 'omap4xxx_clk_init': arch/arm/mach-omap2/clock44xx_data.c:3371:6: warning: 'cpu_clkflg' may be used uninitialized in this function The approach taken here is intended to work if omap4xxx_clk_init() is converted into an initcall. Thanks to Bjarne Steinsbo for proposing another approach. Signed-off-by: Paul Walmsley Cc: Bjarne Steinsbo --- arch/arm/mach-omap2/clock44xx_data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap2/clock44xx_data.c b/arch/arm/mach-omap2/clock44xx_data.c index 2af0e3f00ce1..4304768da712 100644 --- a/arch/arm/mach-omap2/clock44xx_data.c +++ b/arch/arm/mach-omap2/clock44xx_data.c @@ -3376,6 +3376,8 @@ int __init omap4xxx_clk_init(void) } else if (cpu_is_omap446x()) { cpu_mask = RATE_IN_4460; cpu_clkflg = CK_446X; + } else { + return 0; } clk_init(&omap2_clk_functions); -- cgit v1.2.3 From 6719db6a23d4b7f1e5052eedae394135e3aef9c1 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sat, 20 Aug 2011 08:29:51 -0400 Subject: Btrfs: fix 64 bit divide problem This fixes a regression introduced by commit cdcb725c05fe ("Btrfs: check if there is enough space for balancing smarter"). We can't do 64-bit divides on 32-bit architectures. In cases where we need to divide/multiply by 2 we should just left/right shift respectively, and in cases where theres N number of devices use do_div. Also make the counters u64 to match up with rw_devices. Thanks, Signed-off-by: Josef Bacik Acked-and-tested-by: Ingo Molnar Signed-off-by: Linus Torvalds --- fs/btrfs/extent-tree.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 80d6148f60ac..f5be06a2462f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6735,9 +6735,9 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; struct btrfs_device *device; u64 min_free; + u64 dev_min = 1; + u64 dev_nr = 0; int index; - int dev_nr = 0; - int dev_min = 1; int full = 0; int ret = 0; @@ -6796,14 +6796,16 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) index = get_block_group_index(block_group); if (index == 0) { dev_min = 4; - min_free /= 2; + /* Divide by 2 */ + min_free >>= 1; } else if (index == 1) { dev_min = 2; } else if (index == 2) { - min_free *= 2; + /* Multiply by 2 */ + min_free <<= 1; } else if (index == 3) { dev_min = fs_devices->rw_devices; - min_free /= dev_min; + do_div(min_free, dev_min); } mutex_lock(&root->fs_info->chunk_mutex); -- cgit v1.2.3 From 2782a35132339574b06ce30556eb9f97eb1d26cd Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 21 Aug 2011 12:48:04 -0700 Subject: Input: tnetv107x-ts - add missing include of linux/module.h tnetv107x-ts.c uses interfaces from linux/module.h, so it should include that file. This patch fixes build errors. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/tnetv107x-ts.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/touchscreen/tnetv107x-ts.c b/drivers/input/touchscreen/tnetv107x-ts.c index 089b0a0f3d8c..0e8f63e5b36f 100644 --- a/drivers/input/touchscreen/tnetv107x-ts.c +++ b/drivers/input/touchscreen/tnetv107x-ts.c @@ -13,6 +13,7 @@ * GNU General Public License for more details. */ +#include #include #include #include -- cgit v1.2.3 From b9cc510b395543cb7dba89c76421d23ed9e85f95 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 21 Aug 2011 12:48:08 -0700 Subject: Input: ep93xx_keypad - add missing include of linux/module.h ep93xx_keypad.c uses interfaces from linux/module.h, so it should include that file. This patch fixes build errors. Signed-off-by: Axel Lin Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/ep93xx_keypad.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/keyboard/ep93xx_keypad.c b/drivers/input/keyboard/ep93xx_keypad.c index c8242dd190d0..aa17e024d803 100644 --- a/drivers/input/keyboard/ep93xx_keypad.c +++ b/drivers/input/keyboard/ep93xx_keypad.c @@ -20,6 +20,7 @@ * flag. */ +#include #include #include #include -- cgit v1.2.3 From ffb57c4b8612c31204b06713770f6df4b8a94e4f Mon Sep 17 00:00:00 2001 From: Jay Estabrook Date: Wed, 6 Jul 2011 23:57:13 +0000 Subject: drm/radeon/alpha: Add Alpha support to Radeon DRM code Alpha needs to have available the system bus address for the Radeon's local memory, so that it can be used in ttm_bo_vm_fault(), when building the PTEs for accessing that VRAM. So, we make bus.addr hold the ioremap() return, and then we can modify bus.base appropriately for use during page fault processing. Signed-off-by: Jay Estabrook Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_ttm.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 60125ddba1e9..9b86fb0e4122 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -450,6 +450,29 @@ static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ return -EINVAL; mem->bus.base = rdev->mc.aper_base; mem->bus.is_iomem = true; +#ifdef __alpha__ + /* + * Alpha: use bus.addr to hold the ioremap() return, + * so we can modify bus.base below. + */ + if (mem->placement & TTM_PL_FLAG_WC) + mem->bus.addr = + ioremap_wc(mem->bus.base + mem->bus.offset, + mem->bus.size); + else + mem->bus.addr = + ioremap_nocache(mem->bus.base + mem->bus.offset, + mem->bus.size); + + /* + * Alpha: Use just the bus offset plus + * the hose/domain memory base for bus.base. + * It then can be used to build PTEs for VRAM + * access, as done in ttm_bo_vm_fault(). + */ + mem->bus.base = (mem->bus.base & 0x0ffffffffUL) + + rdev->ddev->hose->dense_mem_base; +#endif break; default: return -EINVAL; -- cgit v1.2.3 From 24cae9e7c9537fd6a16bc2f5ec398ee4bef5d007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 19 Aug 2011 15:24:16 +0000 Subject: drm/radeon: Take IH ring into account for test size calculation. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_test.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index dee4a0c1b4b2..1ebd0fe9c13e 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -40,10 +40,14 @@ void radeon_test_moves(struct radeon_device *rdev) size = 1024 * 1024; /* Number of tests = - * (Total GTT - IB pool - writeback page - ring buffer) / test size + * (Total GTT - IB pool - writeback page - ring buffers) / test size */ - n = ((u32)(rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - RADEON_GPU_PAGE_SIZE - - rdev->cp.ring_size)) / size; + n = rdev->mc.gtt_size - RADEON_IB_POOL_SIZE*64*1024 - rdev->cp.ring_size; + if (rdev->wb.wb_obj) + n -= RADEON_GPU_PAGE_SIZE; + if (rdev->ih.ring_obj) + n -= rdev->ih.ring_size; + n /= size; gtt_obj = kzalloc(n * sizeof(*gtt_obj), GFP_KERNEL); if (!gtt_obj) { -- cgit v1.2.3 From 4fb1a35c0185f8fa3e71b12de62b8752a9a9ed0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 19 Aug 2011 15:24:17 +0000 Subject: drm/radeon: Explicitly print GTT/VRAM offsets on test failure. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise these would need to be painstakingly calculated looking at the source code. Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_test.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_test.c b/drivers/gpu/drm/radeon/radeon_test.c index 1ebd0fe9c13e..602fa3541c45 100644 --- a/drivers/gpu/drm/radeon/radeon_test.c +++ b/drivers/gpu/drm/radeon/radeon_test.c @@ -136,9 +136,15 @@ void radeon_test_moves(struct radeon_device *rdev) gtt_start++, vram_start++) { if (*vram_start != gtt_start) { DRM_ERROR("Incorrect GTT->VRAM copy %d: Got 0x%p, " - "expected 0x%p (GTT map 0x%p-0x%p)\n", - i, *vram_start, gtt_start, gtt_map, - gtt_end); + "expected 0x%p (GTT/VRAM offset " + "0x%16llx/0x%16llx)\n", + i, *vram_start, gtt_start, + (unsigned long long) + (gtt_addr - rdev->mc.gtt_start + + (void*)gtt_start - gtt_map), + (unsigned long long) + (vram_addr - rdev->mc.vram_start + + (void*)gtt_start - gtt_map)); radeon_bo_kunmap(vram_obj); goto out_cleanup; } @@ -179,9 +185,15 @@ void radeon_test_moves(struct radeon_device *rdev) gtt_start++, vram_start++) { if (*gtt_start != vram_start) { DRM_ERROR("Incorrect VRAM->GTT copy %d: Got 0x%p, " - "expected 0x%p (VRAM map 0x%p-0x%p)\n", - i, *gtt_start, vram_start, vram_map, - vram_end); + "expected 0x%p (VRAM/GTT offset " + "0x%16llx/0x%16llx)\n", + i, *gtt_start, vram_start, + (unsigned long long) + (vram_addr - rdev->mc.vram_start + + (void*)vram_start - vram_map), + (unsigned long long) + (gtt_addr - rdev->mc.gtt_start + + (void*)vram_start - vram_map)); radeon_bo_kunmap(gtt_obj[i]); goto out_cleanup; } -- cgit v1.2.3 From ba95c45a78d57ac05bf45d81b92a6ec4d299695d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michel=20D=C3=A4nzer?= Date: Fri, 19 Aug 2011 15:24:18 +0000 Subject: drm/radeon: Make vramlimit parameter actually work. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Michel Dänzer Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_device.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index a3b011b49465..b51e15725c6e 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -301,6 +301,8 @@ void radeon_vram_location(struct radeon_device *rdev, struct radeon_mc *mc, u64 mc->mc_vram_size = mc->aper_size; } mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; + if (radeon_vram_limit && radeon_vram_limit < mc->real_vram_size) + mc->real_vram_size = radeon_vram_limit; dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", mc->mc_vram_size >> 20, mc->vram_start, mc->vram_end, mc->real_vram_size >> 20); -- cgit v1.2.3 From 4f41adfd8ce9ff84fa9e968e0fe2854e9bc6fcb0 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 20 Aug 2011 10:23:38 +0100 Subject: ASoC: WM8996 record paths need AIFCLK Make AIFCLK supply the record paths otherwise record will not work unless there is a simultaneous playback. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8996.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index ab8e9d1aaff0..85f43b62743b 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -1213,6 +1213,16 @@ static const struct snd_soc_dapm_route wm8996_dapm_routes[] = { { "AIF2RX0", NULL, "AIFCLK" }, { "AIF2RX1", NULL, "AIFCLK" }, + { "AIF1TX0", NULL, "AIFCLK" }, + { "AIF1TX1", NULL, "AIFCLK" }, + { "AIF1TX2", NULL, "AIFCLK" }, + { "AIF1TX3", NULL, "AIFCLK" }, + { "AIF1TX4", NULL, "AIFCLK" }, + { "AIF1TX5", NULL, "AIFCLK" }, + + { "AIF2TX0", NULL, "AIFCLK" }, + { "AIF2TX1", NULL, "AIFCLK" }, + { "DSP1RXL", NULL, "SYSDSPCLK" }, { "DSP1RXR", NULL, "SYSDSPCLK" }, { "DSP2RXL", NULL, "SYSDSPCLK" }, -- cgit v1.2.3 From 7691cd74c5d6b173e1483277fb70d7798e97d2fa Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sat, 20 Aug 2011 16:59:27 +0100 Subject: ASoC: Fix configuration of WM8996 input enables There's no need for separate widgets for the enables (as the map already shows). Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8996.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index 85f43b62743b..e76d4edb67b2 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -988,15 +988,10 @@ SND_SOC_DAPM_MICBIAS("MICB1", WM8996_POWER_MANAGEMENT_1, 8, 0), SND_SOC_DAPM_PGA("IN1L PGA", WM8996_POWER_MANAGEMENT_2, 5, 0, NULL, 0), SND_SOC_DAPM_PGA("IN1R PGA", WM8996_POWER_MANAGEMENT_2, 4, 0, NULL, 0), -SND_SOC_DAPM_MUX("IN1L Mux", SND_SOC_NOPM, 0, 0, &in1_mux), -SND_SOC_DAPM_MUX("IN1R Mux", SND_SOC_NOPM, 0, 0, &in1_mux), -SND_SOC_DAPM_MUX("IN2L Mux", SND_SOC_NOPM, 0, 0, &in2_mux), -SND_SOC_DAPM_MUX("IN2R Mux", SND_SOC_NOPM, 0, 0, &in2_mux), - -SND_SOC_DAPM_PGA("IN1L", WM8996_POWER_MANAGEMENT_7, 2, 0, NULL, 0), -SND_SOC_DAPM_PGA("IN1R", WM8996_POWER_MANAGEMENT_7, 3, 0, NULL, 0), -SND_SOC_DAPM_PGA("IN2L", WM8996_POWER_MANAGEMENT_7, 6, 0, NULL, 0), -SND_SOC_DAPM_PGA("IN2R", WM8996_POWER_MANAGEMENT_7, 7, 0, NULL, 0), +SND_SOC_DAPM_MUX("IN1L Mux", WM8996_POWER_MANAGEMENT_7, 2, 0, &in1_mux), +SND_SOC_DAPM_MUX("IN1R Mux", WM8996_POWER_MANAGEMENT_7, 3, 0, &in1_mux), +SND_SOC_DAPM_MUX("IN2L Mux", WM8996_POWER_MANAGEMENT_7, 6, 0, &in2_mux), +SND_SOC_DAPM_MUX("IN2R Mux", WM8996_POWER_MANAGEMENT_7, 7, 0, &in2_mux), SND_SOC_DAPM_SUPPLY("DMIC2", WM8996_POWER_MANAGEMENT_7, 9, 0, NULL, 0), SND_SOC_DAPM_SUPPLY("DMIC1", WM8996_POWER_MANAGEMENT_7, 8, 0, NULL, 0), -- cgit v1.2.3 From f79e7ff85223e054c2967820d3be1c125a903bd4 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 21 Aug 2011 12:20:00 +0100 Subject: ASoC: Ensure we only run Speyside WM8962 bias level callbacks once We get called once per DAPM context but only need to run once. When DAPM was serialized this was a series of noops but now it can run in parallel we need to take proper care. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/samsung/speyside_wm8962.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/soc/samsung/speyside_wm8962.c b/sound/soc/samsung/speyside_wm8962.c index 0b9eb5f7ec4c..72535f2daaf2 100644 --- a/sound/soc/samsung/speyside_wm8962.c +++ b/sound/soc/samsung/speyside_wm8962.c @@ -23,6 +23,9 @@ static int speyside_wm8962_set_bias_level(struct snd_soc_card *card, struct snd_soc_dai *codec_dai = card->rtd[0].codec_dai; int ret; + if (dapm->dev != codec_dai->dev) + return 0; + switch (level) { case SND_SOC_BIAS_PREPARE: if (dapm->bias_level == SND_SOC_BIAS_STANDBY) { @@ -57,6 +60,9 @@ static int speyside_wm8962_set_bias_level_post(struct snd_soc_card *card, struct snd_soc_dai *codec_dai = card->rtd[0].codec_dai; int ret; + if (dapm->dev != codec_dai->dev) + return 0; + switch (level) { case SND_SOC_BIAS_STANDBY: ret = snd_soc_dai_set_sysclk(codec_dai, WM8962_SYSCLK_MCLK, -- cgit v1.2.3 From 4df0cb2fa977f99963b616487a22ebd021ea5463 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Sun, 21 Aug 2011 17:18:52 +0100 Subject: ASoC: Clear any outstanding WM8962 FLL lock completions before waiting Ensure that we don't spuriously trigger early. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8962.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 28650edfdebb..1725550c293e 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -2221,6 +2221,8 @@ static int sysclk_event(struct snd_soc_dapm_widget *w, switch (event) { case SND_SOC_DAPM_PRE_PMU: if (fll) { + try_wait_for_completion(&wm8962->fll_lock); + snd_soc_update_bits(codec, WM8962_FLL_CONTROL_1, WM8962_FLL_ENA, WM8962_FLL_ENA); if (wm8962->irq) { @@ -3284,6 +3286,8 @@ static int wm8962_set_fll(struct snd_soc_codec *codec, int fll_id, int source, snd_soc_write(codec, WM8962_FLL_CONTROL_7, fll_div.lambda); snd_soc_write(codec, WM8962_FLL_CONTROL_8, fll_div.n); + try_wait_for_completion(&wm8962->fll_lock); + snd_soc_update_bits(codec, WM8962_FLL_CONTROL_1, WM8962_FLL_FRAC | WM8962_FLL_REFCLK_SRC_MASK | WM8962_FLL_ENA, fll1); -- cgit v1.2.3 From a41619455c0e28b6973471e87f1702c6129d3439 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 16 Aug 2011 16:57:58 +0900 Subject: ASoC: Clear completions from late WM8996 FLL lock IRQs In case we have a pending completion, for example due to a problem with the input clock which got corrected after we timed out. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8996.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index e76d4edb67b2..0936ae5e3749 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -2111,6 +2111,9 @@ static int wm8996_set_fll(struct snd_soc_codec *codec, int fll_id, int source, snd_soc_write(codec, WM8996_FLL_EFS_1, fll_div.lambda); + /* Clear any pending completions (eg, from failed startups) */ + try_wait_for_completion(&wm8996->fll_lock); + snd_soc_update_bits(codec, WM8996_FLL_CONTROL_1, WM8996_FLL_ENA, WM8996_FLL_ENA); -- cgit v1.2.3 From 0d6cfa3a75f5cde5b3ca0dde748fd22625b4f34c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eric=20B=C3=A9nard?= Date: Mon, 22 Aug 2011 15:41:46 +0100 Subject: ARM: 7051/1: cpuimx* boards: fix mach-types errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I made some changes to the entry in the ARM Machine Registry after submission which was the wrong thing to do. This patch should help to fix this error. Signed-off-by: Eric Bénard Signed-off-by: Russell King --- arch/arm/mach-imx/mach-cpuimx27.c | 2 +- arch/arm/mach-imx/mach-cpuimx35.c | 2 +- arch/arm/mach-imx/mach-eukrea_cpuimx25.c | 2 +- arch/arm/tools/mach-types | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-imx/mach-cpuimx27.c b/arch/arm/mach-imx/mach-cpuimx27.c index 87887ac5806b..f851fe903687 100644 --- a/arch/arm/mach-imx/mach-cpuimx27.c +++ b/arch/arm/mach-imx/mach-cpuimx27.c @@ -310,7 +310,7 @@ static struct sys_timer eukrea_cpuimx27_timer = { .init = eukrea_cpuimx27_timer_init, }; -MACHINE_START(CPUIMX27, "EUKREA CPUIMX27") +MACHINE_START(EUKREA_CPUIMX27, "EUKREA CPUIMX27") .boot_params = MX27_PHYS_OFFSET + 0x100, .map_io = mx27_map_io, .init_early = imx27_init_early, diff --git a/arch/arm/mach-imx/mach-cpuimx35.c b/arch/arm/mach-imx/mach-cpuimx35.c index f39a478ba1a6..4bd083ba9af2 100644 --- a/arch/arm/mach-imx/mach-cpuimx35.c +++ b/arch/arm/mach-imx/mach-cpuimx35.c @@ -192,7 +192,7 @@ struct sys_timer eukrea_cpuimx35_timer = { .init = eukrea_cpuimx35_timer_init, }; -MACHINE_START(EUKREA_CPUIMX35, "Eukrea CPUIMX35") +MACHINE_START(EUKREA_CPUIMX35SD, "Eukrea CPUIMX35") /* Maintainer: Eukrea Electromatique */ .boot_params = MX3x_PHYS_OFFSET + 0x100, .map_io = mx35_map_io, diff --git a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c index da36da52969d..2442d5da883d 100644 --- a/arch/arm/mach-imx/mach-eukrea_cpuimx25.c +++ b/arch/arm/mach-imx/mach-eukrea_cpuimx25.c @@ -161,7 +161,7 @@ static struct sys_timer eukrea_cpuimx25_timer = { .init = eukrea_cpuimx25_timer_init, }; -MACHINE_START(EUKREA_CPUIMX25, "Eukrea CPUIMX25") +MACHINE_START(EUKREA_CPUIMX25SD, "Eukrea CPUIMX25") /* Maintainer: Eukrea Electromatique */ .boot_params = MX25_PHYS_OFFSET + 0x100, .map_io = mx25_map_io, diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types index fff68d0d521b..62cc8f981171 100644 --- a/arch/arm/tools/mach-types +++ b/arch/arm/tools/mach-types @@ -351,7 +351,7 @@ centro MACH_CENTRO CENTRO 1944 nokia_rx51 MACH_NOKIA_RX51 NOKIA_RX51 1955 omap_zoom2 MACH_OMAP_ZOOM2 OMAP_ZOOM2 1967 cpuat9260 MACH_CPUAT9260 CPUAT9260 1973 -eukrea_cpuimx27 MACH_CPUIMX27 CPUIMX27 1975 +eukrea_cpuimx27 MACH_EUKREA_CPUIMX27 EUKREA_CPUIMX27 1975 acs5k MACH_ACS5K ACS5K 1982 snapper_9260 MACH_SNAPPER_9260 SNAPPER_9260 1987 dsm320 MACH_DSM320 DSM320 1988 @@ -476,8 +476,8 @@ cns3420vb MACH_CNS3420VB CNS3420VB 2776 omap4_panda MACH_OMAP4_PANDA OMAP4_PANDA 2791 ti8168evm MACH_TI8168EVM TI8168EVM 2800 teton_bga MACH_TETON_BGA TETON_BGA 2816 -eukrea_cpuimx25sd MACH_EUKREA_CPUIMX25 EUKREA_CPUIMX25 2820 -eukrea_cpuimx35sd MACH_EUKREA_CPUIMX35 EUKREA_CPUIMX35 2821 +eukrea_cpuimx25sd MACH_EUKREA_CPUIMX25SD EUKREA_CPUIMX25SD 2820 +eukrea_cpuimx35sd MACH_EUKREA_CPUIMX35SD EUKREA_CPUIMX35SD 2821 eukrea_cpuimx51sd MACH_EUKREA_CPUIMX51SD EUKREA_CPUIMX51SD 2822 eukrea_cpuimx51 MACH_EUKREA_CPUIMX51 EUKREA_CPUIMX51 2823 smdkc210 MACH_SMDKC210 SMDKC210 2838 -- cgit v1.2.3 From 3c05c4bed4ccce3f22f6d7899b308faae24ad198 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 17 Aug 2011 15:15:00 +0200 Subject: xen: Do not enable PV IPIs when vector callback not present Fix regression for HVM case on older (<4.1.1) hypervisors caused by commit 99bbb3a84a99cd04ab16b998b20f01a72cfa9f4f Author: Stefano Stabellini Date: Thu Dec 2 17:55:10 2010 +0000 xen: PV on HVM: support PV spinlocks and IPIs This change replaced the SMP operations with event based handlers without taking into account that this only works when the hypervisor supports callback vectors. This causes unexplainable hangs early on boot for HVM guests with more than one CPU. BugLink: http://bugs.launchpad.net/bugs/791850 CC: stable@kernel.org Signed-off-by: Stefan Bader Signed-off-by: Stefano Stabellini Tested-and-Reported-by: Stefan Bader Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index b4533a86d7e4..e79dbb95482b 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -521,8 +521,6 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) native_smp_prepare_cpus(max_cpus); WARN_ON(xen_smp_intr_init(0)); - if (!xen_have_vector_callback) - return; xen_init_lock_cpu(0); xen_init_spinlocks(); } @@ -546,6 +544,8 @@ static void xen_hvm_cpu_die(unsigned int cpu) void __init xen_hvm_smp_init(void) { + if (!xen_have_vector_callback) + return; smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; smp_ops.smp_send_reschedule = xen_smp_send_reschedule; smp_ops.cpu_up = xen_hvm_cpu_up; -- cgit v1.2.3 From 60c5f08e154fd235056645e050f2cd5671b19125 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 11 Aug 2011 13:17:20 -0700 Subject: xen/tracing: Fix tracing config option properly Steven Rostedt says we should use CONFIG_EVENT_TRACING. Cc:Steven Rostedt Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3326204e251f..add2c2d729ce 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -15,7 +15,7 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \ grant-table.o suspend.o platform-pci-unplug.o \ p2m.o -obj-$(CONFIG_FTRACE) += trace.o +obj-$(CONFIG_EVENT_TRACING) += trace.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o -- cgit v1.2.3 From 6f5986bce558e64fe867bff600a2127a3cb0c006 Mon Sep 17 00:00:00 2001 From: Joe Jin Date: Mon, 15 Aug 2011 12:51:31 +0800 Subject: xen-blkback: Don't disconnect backend until state switched to XenbusStateClosed. When do block-attach/block-detach test with below steps, umount hangs in the guest. Furthermore shutdown ends up being stuck when umounting file-systems. 1. start guest. 2. attach new block device by xm block-attach in Dom0. 3. mount new disk in guest. 4. execute xm block-detach to detach the block device in dom0 until timeout 5. Any request to the disk will hung. Root cause: This issue is caused when setting backend device's state to 'XenbusStateClosing', which sends to the frontend the XenbusStateClosing notification. When frontend receives the notification it tries to release the disk in blkfront_closing(), but at that moment the disk is still in use by guest, so frontend refuses to close. Specifically it sets the disk state to XenbusStateClosing and sends the notification to backend - when backend receives the event, it disconnects the vbd from real device, and sets the vbd device state to XenbusStateClosing. The backend disconnects the real device/file, and any IO requests to the disk in guest will end up in ether, leaving disk DEAD and set to XenbusStateClosing. When the guest wants to disconnect the disk, umount will hang on blkif_release()->xlvbd_release_gendisk() as it is unable to send any IO to the disk, which prevents clean system shutdown. Solution: Don't disconnect backend until frontend state switched to XenbusStateClosed. Signed-off-by: Joe Jin Cc: Daniel Stodden Cc: Jens Axboe Cc: Annie Li Cc: Ian Campbell [v1: Modified description a bit] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/xenbus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 6cc0db1bf522..7be3d0fe8ad3 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -601,11 +601,11 @@ static void frontend_changed(struct xenbus_device *dev, break; case XenbusStateClosing: - xen_blkif_disconnect(be->blkif); xenbus_switch_state(dev, XenbusStateClosing); break; case XenbusStateClosed: + xen_blkif_disconnect(be->blkif); xenbus_switch_state(dev, XenbusStateClosed); if (xenbus_dev_is_online(dev)) break; -- cgit v1.2.3 From 1bc05b0ae6448b20d46076899e0cc12ad999e50e Mon Sep 17 00:00:00 2001 From: Joe Jin Date: Mon, 15 Aug 2011 12:57:07 +0800 Subject: xen-blkback: fixed indentation and comments This patch fixes belows: 1. Fix code style issue. 2. Fix incorrect functions name in comments. Signed-off-by: Joe Jin Cc: Jens Axboe Cc: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/block/xen-blkback/common.h | 2 +- drivers/block/xen-blkback/xenbus.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 9e40b283a468..00c57c90e2d6 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h @@ -46,7 +46,7 @@ #define DRV_PFX "xen-blkback:" #define DPRINTK(fmt, args...) \ - pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ + pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \ __func__, __LINE__, ##args) diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 7be3d0fe8ad3..a96cb5f893a8 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -590,7 +590,7 @@ static void frontend_changed(struct xenbus_device *dev, /* * Enforce precondition before potential leak point. - * blkif_disconnect() is idempotent. + * xen_blkif_disconnect() is idempotent. */ xen_blkif_disconnect(be->blkif); @@ -611,7 +611,7 @@ static void frontend_changed(struct xenbus_device *dev, break; /* fall through if not online */ case XenbusStateUnknown: - /* implies blkif_disconnect() via blkback_remove() */ + /* implies xen_blkif_disconnect() via xen_blkbk_remove() */ device_unregister(&dev->dev); break; -- cgit v1.2.3 From 5b9063b19caaffe7135e1f9b8b22174ded0f586b Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Sun, 21 Aug 2011 21:04:12 -0700 Subject: Input: ad714xx-spi - force SPI bus into the default 8-bit mode Signed-off-by: Michael Hennerich Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ad714x-spi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c index 4120dd549305..da83ac9bed7e 100644 --- a/drivers/input/misc/ad714x-spi.c +++ b/drivers/input/misc/ad714x-spi.c @@ -54,6 +54,12 @@ static int ad714x_spi_write(struct device *dev, unsigned short reg, static int __devinit ad714x_spi_probe(struct spi_device *spi) { struct ad714x_chip *chip; + int err; + + spi->bits_per_word = 8; + err = spi_setup(spi); + if (err < 0) + return err; chip = ad714x_probe(&spi->dev, BUS_SPI, spi->irq, ad714x_spi_read, ad714x_spi_write); -- cgit v1.2.3 From 6337de2204be3b7b40825a1d30de30e514e8947b Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Sun, 21 Aug 2011 21:04:12 -0700 Subject: Input: ad714x - fix endianness issues Allow driver to be used on Big Endian boxes. Signed-off-by: Michael Hennerich Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ad714x-i2c.c | 34 ++++++++++------------------------ drivers/input/misc/ad714x-spi.c | 24 +++++++++++++++--------- 2 files changed, 25 insertions(+), 33 deletions(-) diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c index e21deb1baa8a..00a6a223212a 100644 --- a/drivers/input/misc/ad714x-i2c.c +++ b/drivers/input/misc/ad714x-i2c.c @@ -32,17 +32,12 @@ static int ad714x_i2c_write(struct device *dev, unsigned short reg, { struct i2c_client *client = to_i2c_client(dev); int ret = 0; - u8 *_reg = (u8 *)® - u8 *_data = (u8 *)&data; - - u8 tx[4] = { - _reg[1], - _reg[0], - _data[1], - _data[0] + unsigned short tx[2] = { + cpu_to_be16(reg), + cpu_to_be16(data) }; - ret = i2c_master_send(client, tx, 4); + ret = i2c_master_send(client, (u8 *)tx, 4); if (ret < 0) dev_err(&client->dev, "I2C write error\n"); @@ -54,25 +49,16 @@ static int ad714x_i2c_read(struct device *dev, unsigned short reg, { struct i2c_client *client = to_i2c_client(dev); int ret = 0; - u8 *_reg = (u8 *)® - u8 *_data = (u8 *)data; + unsigned short tx = cpu_to_be16(reg); - u8 tx[2] = { - _reg[1], - _reg[0] - }; - u8 rx[2]; - - ret = i2c_master_send(client, tx, 2); + ret = i2c_master_send(client, (u8 *)&tx, 2); if (ret >= 0) - ret = i2c_master_recv(client, rx, 2); + ret = i2c_master_recv(client, (u8 *)data, 2); - if (unlikely(ret < 0)) { + if (unlikely(ret < 0)) dev_err(&client->dev, "I2C read error\n"); - } else { - _data[0] = rx[1]; - _data[1] = rx[0]; - } + else + *data = be16_to_cpu(*data); return ret; } diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c index da83ac9bed7e..0c7f9488f5cb 100644 --- a/drivers/input/misc/ad714x-spi.c +++ b/drivers/input/misc/ad714x-spi.c @@ -6,7 +6,7 @@ * Licensed under the GPL-2 or later. */ -#include /* BUS_I2C */ +#include /* BUS_SPI */ #include #include #include @@ -30,22 +30,28 @@ static int ad714x_spi_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume); -static int ad714x_spi_read(struct device *dev, unsigned short reg, - unsigned short *data) +static int ad714x_spi_read(struct device *dev, + unsigned short reg, unsigned short *data) { struct spi_device *spi = to_spi_device(dev); - unsigned short tx = AD714x_SPI_CMD_PREFIX | AD714x_SPI_READ | reg; + unsigned short tx = cpu_to_be16(AD714x_SPI_CMD_PREFIX | + AD714x_SPI_READ | reg); + int ret; - return spi_write_then_read(spi, (u8 *)&tx, 2, (u8 *)data, 2); + ret = spi_write_then_read(spi, &tx, 2, data, 2); + + *data = be16_to_cpup(data); + + return ret; } -static int ad714x_spi_write(struct device *dev, unsigned short reg, - unsigned short data) +static int ad714x_spi_write(struct device *dev, + unsigned short reg, unsigned short data) { struct spi_device *spi = to_spi_device(dev); unsigned short tx[2] = { - AD714x_SPI_CMD_PREFIX | reg, - data + cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg), + cpu_to_be16(data) }; return spi_write(spi, (u8 *)tx, 4); -- cgit v1.2.3 From c0409feb86893f5ccf73964c7b2b47ca64bdb014 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 22 Aug 2011 09:45:39 -0700 Subject: Input: ad714x - use DMA-safe buffers for spi_write() spi_write() requires use of DMA-safe (cacheline aligned) buffers. Also use the same buffers when reading data since to avoid extra locking and potential memory allocation in spi_write_then_read(). Acked-by: Michael Hennerich Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ad714x-i2c.c | 60 ++++++++++++++------------- drivers/input/misc/ad714x-spi.c | 51 ++++++++++++++++------- drivers/input/misc/ad714x.c | 90 ++++++++++++++--------------------------- drivers/input/misc/ad714x.h | 33 ++++++++++++++- 4 files changed, 131 insertions(+), 103 deletions(-) diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c index 00a6a223212a..6c6121865f0e 100644 --- a/drivers/input/misc/ad714x-i2c.c +++ b/drivers/input/misc/ad714x-i2c.c @@ -27,40 +27,46 @@ static int ad714x_i2c_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(ad714x_i2c_pm, ad714x_i2c_suspend, ad714x_i2c_resume); -static int ad714x_i2c_write(struct device *dev, unsigned short reg, - unsigned short data) +static int ad714x_i2c_write(struct ad714x_chip *chip, + unsigned short reg, unsigned short data) { - struct i2c_client *client = to_i2c_client(dev); - int ret = 0; - unsigned short tx[2] = { - cpu_to_be16(reg), - cpu_to_be16(data) - }; - - ret = i2c_master_send(client, (u8 *)tx, 4); - if (ret < 0) - dev_err(&client->dev, "I2C write error\n"); - - return ret; + struct i2c_client *client = to_i2c_client(chip->dev); + int error; + + chip->xfer_buf[0] = cpu_to_be16(reg); + chip->xfer_buf[1] = cpu_to_be16(data); + + error = i2c_master_send(client, (u8 *)chip->xfer_buf, + 2 * sizeof(*chip->xfer_buf)); + if (unlikely(error < 0)) { + dev_err(&client->dev, "I2C write error: %d\n", error); + return error; + } + + return 0; } -static int ad714x_i2c_read(struct device *dev, unsigned short reg, - unsigned short *data) +static int ad714x_i2c_read(struct ad714x_chip *chip, + unsigned short reg, unsigned short *data) { - struct i2c_client *client = to_i2c_client(dev); - int ret = 0; - unsigned short tx = cpu_to_be16(reg); + struct i2c_client *client = to_i2c_client(chip->dev); + int error; + + chip->xfer_buf[0] = cpu_to_be16(reg); - ret = i2c_master_send(client, (u8 *)&tx, 2); - if (ret >= 0) - ret = i2c_master_recv(client, (u8 *)data, 2); + error = i2c_master_send(client, (u8 *)chip->xfer_buf, + sizeof(*chip->xfer_buf)); + if (error >= 0) + error = i2c_master_recv(client, (u8 *)chip->xfer_buf, + sizeof(*chip->xfer_buf)); - if (unlikely(ret < 0)) - dev_err(&client->dev, "I2C read error\n"); - else - *data = be16_to_cpu(*data); + if (unlikely(error < 0)) { + dev_err(&client->dev, "I2C read error: %d\n", error); + return error; + } - return ret; + *data = be16_to_cpup(chip->xfer_buf); + return 0; } static int __devinit ad714x_i2c_probe(struct i2c_client *client, diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c index 0c7f9488f5cb..306577dc0b98 100644 --- a/drivers/input/misc/ad714x-spi.c +++ b/drivers/input/misc/ad714x-spi.c @@ -30,31 +30,54 @@ static int ad714x_spi_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume); -static int ad714x_spi_read(struct device *dev, +static int ad714x_spi_read(struct ad714x_chip *chip, unsigned short reg, unsigned short *data) { - struct spi_device *spi = to_spi_device(dev); - unsigned short tx = cpu_to_be16(AD714x_SPI_CMD_PREFIX | + struct spi_device *spi = to_spi_device(chip->dev); + struct spi_message message; + struct spi_transfer xfer[2]; + int error; + + spi_message_init(&message); + memset(xfer, 0, sizeof(xfer)); + + chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX | AD714x_SPI_READ | reg); - int ret; + xfer[0].tx_buf = &chip->xfer_buf[0]; + xfer[0].len = sizeof(chip->xfer_buf[0]); + spi_message_add_tail(&xfer[0], &message); - ret = spi_write_then_read(spi, &tx, 2, data, 2); + xfer[1].rx_buf = &chip->xfer_buf[1]; + xfer[1].len = sizeof(chip->xfer_buf[1]); + spi_message_add_tail(&xfer[1], &message); - *data = be16_to_cpup(data); + error = spi_sync(spi, &message); + if (unlikely(error)) { + dev_err(chip->dev, "SPI read error: %d\n", error); + return error; + } - return ret; + *data = be16_to_cpu(chip->xfer_buf[1]); + return 0; } -static int ad714x_spi_write(struct device *dev, +static int ad714x_spi_write(struct ad714x_chip *chip, unsigned short reg, unsigned short data) { - struct spi_device *spi = to_spi_device(dev); - unsigned short tx[2] = { - cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg), - cpu_to_be16(data) - }; + struct spi_device *spi = to_spi_device(chip->dev); + int error; - return spi_write(spi, (u8 *)tx, 4); + chip->xfer_buf[0] = cpu_to_be16(AD714x_SPI_CMD_PREFIX | reg); + chip->xfer_buf[1] = cpu_to_be16(data); + + error = spi_write(spi, (u8 *)chip->xfer_buf, + 2 * sizeof(*chip->xfer_buf)); + if (unlikely(error)) { + dev_err(chip->dev, "SPI write error: %d\n", error); + return error; + } + + return 0; } static int __devinit ad714x_spi_probe(struct spi_device *spi) diff --git a/drivers/input/misc/ad714x.c b/drivers/input/misc/ad714x.c index c3a62c42cd28..2be0366c8123 100644 --- a/drivers/input/misc/ad714x.c +++ b/drivers/input/misc/ad714x.c @@ -59,7 +59,6 @@ #define STAGE11_AMBIENT 0x27D #define PER_STAGE_REG_NUM 36 -#define STAGE_NUM 12 #define STAGE_CFGREG_NUM 8 #define SYS_CFGREG_NUM 8 @@ -124,28 +123,6 @@ struct ad714x_driver_data { * information to integrate all things which will be private data * of spi/i2c device */ -struct ad714x_chip { - unsigned short h_state; - unsigned short l_state; - unsigned short c_state; - unsigned short adc_reg[STAGE_NUM]; - unsigned short amb_reg[STAGE_NUM]; - unsigned short sensor_val[STAGE_NUM]; - - struct ad714x_platform_data *hw; - struct ad714x_driver_data *sw; - - int irq; - struct device *dev; - ad714x_read_t read; - ad714x_write_t write; - - struct mutex mutex; - - unsigned product; - unsigned version; -}; - static void ad714x_use_com_int(struct ad714x_chip *ad714x, int start_stage, int end_stage) { @@ -154,13 +131,13 @@ static void ad714x_use_com_int(struct ad714x_chip *ad714x, mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1); - ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data); + ad714x->read(ad714x, STG_COM_INT_EN_REG, &data); data |= 1 << end_stage; - ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data); + ad714x->write(ad714x, STG_COM_INT_EN_REG, data); - ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data); data &= ~mask; - ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data); + ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data); } static void ad714x_use_thr_int(struct ad714x_chip *ad714x, @@ -171,13 +148,13 @@ static void ad714x_use_thr_int(struct ad714x_chip *ad714x, mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1); - ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data); + ad714x->read(ad714x, STG_COM_INT_EN_REG, &data); data &= ~(1 << end_stage); - ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data); + ad714x->write(ad714x, STG_COM_INT_EN_REG, data); - ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data); data |= mask; - ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data); + ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data); } static int ad714x_cal_highest_stage(struct ad714x_chip *ad714x, @@ -274,10 +251,8 @@ static void ad714x_slider_cal_sensor_val(struct ad714x_chip *ad714x, int idx) int i; for (i = hw->start_stage; i <= hw->end_stage; i++) { - ad714x->read(ad714x->dev, CDC_RESULT_S0 + i, - &ad714x->adc_reg[i]); - ad714x->read(ad714x->dev, - STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, + ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); + ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, &ad714x->amb_reg[i]); ad714x->sensor_val[i] = abs(ad714x->adc_reg[i] - @@ -445,10 +420,8 @@ static void ad714x_wheel_cal_sensor_val(struct ad714x_chip *ad714x, int idx) int i; for (i = hw->start_stage; i <= hw->end_stage; i++) { - ad714x->read(ad714x->dev, CDC_RESULT_S0 + i, - &ad714x->adc_reg[i]); - ad714x->read(ad714x->dev, - STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, + ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); + ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, &ad714x->amb_reg[i]); if (ad714x->adc_reg[i] > ad714x->amb_reg[i]) ad714x->sensor_val[i] = ad714x->adc_reg[i] - @@ -598,10 +571,8 @@ static void touchpad_cal_sensor_val(struct ad714x_chip *ad714x, int idx) int i; for (i = hw->x_start_stage; i <= hw->x_end_stage; i++) { - ad714x->read(ad714x->dev, CDC_RESULT_S0 + i, - &ad714x->adc_reg[i]); - ad714x->read(ad714x->dev, - STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, + ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); + ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, &ad714x->amb_reg[i]); if (ad714x->adc_reg[i] > ad714x->amb_reg[i]) ad714x->sensor_val[i] = ad714x->adc_reg[i] - @@ -891,7 +862,7 @@ static int ad714x_hw_detect(struct ad714x_chip *ad714x) { unsigned short data; - ad714x->read(ad714x->dev, AD714X_PARTID_REG, &data); + ad714x->read(ad714x, AD714X_PARTID_REG, &data); switch (data & 0xFFF0) { case AD7142_PARTID: ad714x->product = 0x7142; @@ -940,23 +911,22 @@ static void ad714x_hw_init(struct ad714x_chip *ad714x) for (i = 0; i < STAGE_NUM; i++) { reg_base = AD714X_STAGECFG_REG + i * STAGE_CFGREG_NUM; for (j = 0; j < STAGE_CFGREG_NUM; j++) - ad714x->write(ad714x->dev, reg_base + j, + ad714x->write(ad714x, reg_base + j, ad714x->hw->stage_cfg_reg[i][j]); } for (i = 0; i < SYS_CFGREG_NUM; i++) - ad714x->write(ad714x->dev, AD714X_SYSCFG_REG + i, + ad714x->write(ad714x, AD714X_SYSCFG_REG + i, ad714x->hw->sys_cfg_reg[i]); for (i = 0; i < SYS_CFGREG_NUM; i++) - ad714x->read(ad714x->dev, AD714X_SYSCFG_REG + i, - &data); + ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data); - ad714x->write(ad714x->dev, AD714X_STG_CAL_EN_REG, 0xFFF); + ad714x->write(ad714x, AD714X_STG_CAL_EN_REG, 0xFFF); /* clear all interrupts */ - ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data); - ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data); - ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data); + ad714x->read(ad714x, STG_COM_INT_STA_REG, &data); } static irqreturn_t ad714x_interrupt_thread(int irq, void *data) @@ -966,9 +936,9 @@ static irqreturn_t ad714x_interrupt_thread(int irq, void *data) mutex_lock(&ad714x->mutex); - ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &ad714x->l_state); - ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &ad714x->h_state); - ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &ad714x->c_state); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state); + ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &ad714x->h_state); + ad714x->read(ad714x, STG_COM_INT_STA_REG, &ad714x->c_state); for (i = 0; i < ad714x->hw->button_num; i++) ad714x_button_state_machine(ad714x, i); @@ -1245,7 +1215,7 @@ int ad714x_disable(struct ad714x_chip *ad714x) mutex_lock(&ad714x->mutex); data = ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL] | 0x3; - ad714x->write(ad714x->dev, AD714X_PWR_CTRL, data); + ad714x->write(ad714x, AD714X_PWR_CTRL, data); mutex_unlock(&ad714x->mutex); @@ -1263,16 +1233,16 @@ int ad714x_enable(struct ad714x_chip *ad714x) /* resume to non-shutdown mode */ - ad714x->write(ad714x->dev, AD714X_PWR_CTRL, + ad714x->write(ad714x, AD714X_PWR_CTRL, ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL]); /* make sure the interrupt output line is not low level after resume, * otherwise we will get no chance to enter falling-edge irq again */ - ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data); - ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data); - ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data); + ad714x->read(ad714x, STG_COM_INT_STA_REG, &data); mutex_unlock(&ad714x->mutex); diff --git a/drivers/input/misc/ad714x.h b/drivers/input/misc/ad714x.h index 45c54fb13f07..d12d14911fc3 100644 --- a/drivers/input/misc/ad714x.h +++ b/drivers/input/misc/ad714x.h @@ -11,11 +11,40 @@ #include +#define STAGE_NUM 12 + struct device; +struct ad714x_platform_data; +struct ad714x_driver_data; struct ad714x_chip; -typedef int (*ad714x_read_t)(struct device *, unsigned short, unsigned short *); -typedef int (*ad714x_write_t)(struct device *, unsigned short, unsigned short); +typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *); +typedef int (*ad714x_write_t)(struct ad714x_chip *, unsigned short, unsigned short); + +struct ad714x_chip { + unsigned short h_state; + unsigned short l_state; + unsigned short c_state; + unsigned short adc_reg[STAGE_NUM]; + unsigned short amb_reg[STAGE_NUM]; + unsigned short sensor_val[STAGE_NUM]; + + struct ad714x_platform_data *hw; + struct ad714x_driver_data *sw; + + int irq; + struct device *dev; + ad714x_read_t read; + ad714x_write_t write; + + struct mutex mutex; + + unsigned product; + unsigned version; + + __be16 xfer_buf[16] ____cacheline_aligned; + +}; int ad714x_disable(struct ad714x_chip *ad714x); int ad714x_enable(struct ad714x_chip *ad714x); -- cgit v1.2.3 From 9eff794b777ac9ca034129a1b637204000c8fb29 Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Mon, 22 Aug 2011 09:45:42 -0700 Subject: Input: ad714x - read the interrupt status registers in a row The interrupt status registers should be read in row to avoid invalid data. Alter "read" method for both bus options to allow reading several registers in a row and make sure we read interrupt status registers properly. Read sequence saves 50% of bus transactions compared to single register reads. So use it also for the result registers, which are also located in a row. Also update copyright notice. Signed-off-by: Michael Hennerich Signed-off-by: Dmitry Torokhov --- drivers/input/misc/ad714x-i2c.c | 11 +++++--- drivers/input/misc/ad714x-spi.c | 11 +++++--- drivers/input/misc/ad714x.c | 62 +++++++++++++++++++++-------------------- drivers/input/misc/ad714x.h | 6 ++-- 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c index 6c6121865f0e..025417d74ca2 100644 --- a/drivers/input/misc/ad714x-i2c.c +++ b/drivers/input/misc/ad714x-i2c.c @@ -1,7 +1,7 @@ /* * AD714X CapTouch Programmable Controller driver (I2C bus) * - * Copyright 2009 Analog Devices Inc. + * Copyright 2009-2011 Analog Devices Inc. * * Licensed under the GPL-2 or later. */ @@ -47,9 +47,10 @@ static int ad714x_i2c_write(struct ad714x_chip *chip, } static int ad714x_i2c_read(struct ad714x_chip *chip, - unsigned short reg, unsigned short *data) + unsigned short reg, unsigned short *data, size_t len) { struct i2c_client *client = to_i2c_client(chip->dev); + int i; int error; chip->xfer_buf[0] = cpu_to_be16(reg); @@ -58,14 +59,16 @@ static int ad714x_i2c_read(struct ad714x_chip *chip, sizeof(*chip->xfer_buf)); if (error >= 0) error = i2c_master_recv(client, (u8 *)chip->xfer_buf, - sizeof(*chip->xfer_buf)); + len * sizeof(*chip->xfer_buf)); if (unlikely(error < 0)) { dev_err(&client->dev, "I2C read error: %d\n", error); return error; } - *data = be16_to_cpup(chip->xfer_buf); + for (i = 0; i < len; i++) + data[i] = be16_to_cpu(chip->xfer_buf[i]); + return 0; } diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c index 306577dc0b98..875b50811361 100644 --- a/drivers/input/misc/ad714x-spi.c +++ b/drivers/input/misc/ad714x-spi.c @@ -1,7 +1,7 @@ /* * AD714X CapTouch Programmable Controller driver (SPI bus) * - * Copyright 2009 Analog Devices Inc. + * Copyright 2009-2011 Analog Devices Inc. * * Licensed under the GPL-2 or later. */ @@ -31,11 +31,12 @@ static int ad714x_spi_resume(struct device *dev) static SIMPLE_DEV_PM_OPS(ad714x_spi_pm, ad714x_spi_suspend, ad714x_spi_resume); static int ad714x_spi_read(struct ad714x_chip *chip, - unsigned short reg, unsigned short *data) + unsigned short reg, unsigned short *data, size_t len) { struct spi_device *spi = to_spi_device(chip->dev); struct spi_message message; struct spi_transfer xfer[2]; + int i; int error; spi_message_init(&message); @@ -48,7 +49,7 @@ static int ad714x_spi_read(struct ad714x_chip *chip, spi_message_add_tail(&xfer[0], &message); xfer[1].rx_buf = &chip->xfer_buf[1]; - xfer[1].len = sizeof(chip->xfer_buf[1]); + xfer[1].len = sizeof(chip->xfer_buf[1]) * len; spi_message_add_tail(&xfer[1], &message); error = spi_sync(spi, &message); @@ -57,7 +58,9 @@ static int ad714x_spi_read(struct ad714x_chip *chip, return error; } - *data = be16_to_cpu(chip->xfer_buf[1]); + for (i = 0; i < len; i++) + data[i] = be16_to_cpu(chip->xfer_buf[i + 1]); + return 0; } diff --git a/drivers/input/misc/ad714x.c b/drivers/input/misc/ad714x.c index 2be0366c8123..ca42c7d2a3c7 100644 --- a/drivers/input/misc/ad714x.c +++ b/drivers/input/misc/ad714x.c @@ -1,7 +1,7 @@ /* * AD714X CapTouch Programmable Controller driver supporting AD7142/3/7/8/7A * - * Copyright 2009 Analog Devices Inc. + * Copyright 2009-2011 Analog Devices Inc. * * Licensed under the GPL-2 or later. */ @@ -123,6 +123,7 @@ struct ad714x_driver_data { * information to integrate all things which will be private data * of spi/i2c device */ + static void ad714x_use_com_int(struct ad714x_chip *ad714x, int start_stage, int end_stage) { @@ -131,11 +132,11 @@ static void ad714x_use_com_int(struct ad714x_chip *ad714x, mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1); - ad714x->read(ad714x, STG_COM_INT_EN_REG, &data); + ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1); data |= 1 << end_stage; ad714x->write(ad714x, STG_COM_INT_EN_REG, data); - ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1); data &= ~mask; ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data); } @@ -148,11 +149,11 @@ static void ad714x_use_thr_int(struct ad714x_chip *ad714x, mask = ((1 << (end_stage + 1)) - 1) - ((1 << start_stage) - 1); - ad714x->read(ad714x, STG_COM_INT_EN_REG, &data); + ad714x->read(ad714x, STG_COM_INT_EN_REG, &data, 1); data &= ~(1 << end_stage); ad714x->write(ad714x, STG_COM_INT_EN_REG, data); - ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data); + ad714x->read(ad714x, STG_HIGH_INT_EN_REG, &data, 1); data |= mask; ad714x->write(ad714x, STG_HIGH_INT_EN_REG, data); } @@ -250,13 +251,16 @@ static void ad714x_slider_cal_sensor_val(struct ad714x_chip *ad714x, int idx) struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx]; int i; + ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage, + &ad714x->adc_reg[hw->start_stage], + hw->end_stage - hw->start_stage + 1); + for (i = hw->start_stage; i <= hw->end_stage; i++) { - ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, - &ad714x->amb_reg[i]); + &ad714x->amb_reg[i], 1); - ad714x->sensor_val[i] = abs(ad714x->adc_reg[i] - - ad714x->amb_reg[i]); + ad714x->sensor_val[i] = + abs(ad714x->adc_reg[i] - ad714x->amb_reg[i]); } } @@ -419,13 +423,16 @@ static void ad714x_wheel_cal_sensor_val(struct ad714x_chip *ad714x, int idx) struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx]; int i; + ad714x->read(ad714x, CDC_RESULT_S0 + hw->start_stage, + &ad714x->adc_reg[hw->start_stage], + hw->end_stage - hw->start_stage + 1); + for (i = hw->start_stage; i <= hw->end_stage; i++) { - ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, - &ad714x->amb_reg[i]); + &ad714x->amb_reg[i], 1); if (ad714x->adc_reg[i] > ad714x->amb_reg[i]) - ad714x->sensor_val[i] = ad714x->adc_reg[i] - - ad714x->amb_reg[i]; + ad714x->sensor_val[i] = + ad714x->adc_reg[i] - ad714x->amb_reg[i]; else ad714x->sensor_val[i] = 0; } @@ -570,13 +577,16 @@ static void touchpad_cal_sensor_val(struct ad714x_chip *ad714x, int idx) struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx]; int i; + ad714x->read(ad714x, CDC_RESULT_S0 + hw->x_start_stage, + &ad714x->adc_reg[hw->x_start_stage], + hw->x_end_stage - hw->x_start_stage + 1); + for (i = hw->x_start_stage; i <= hw->x_end_stage; i++) { - ad714x->read(ad714x, CDC_RESULT_S0 + i, &ad714x->adc_reg[i]); ad714x->read(ad714x, STAGE0_AMBIENT + i * PER_STAGE_REG_NUM, - &ad714x->amb_reg[i]); + &ad714x->amb_reg[i], 1); if (ad714x->adc_reg[i] > ad714x->amb_reg[i]) - ad714x->sensor_val[i] = ad714x->adc_reg[i] - - ad714x->amb_reg[i]; + ad714x->sensor_val[i] = + ad714x->adc_reg[i] - ad714x->amb_reg[i]; else ad714x->sensor_val[i] = 0; } @@ -862,7 +872,7 @@ static int ad714x_hw_detect(struct ad714x_chip *ad714x) { unsigned short data; - ad714x->read(ad714x, AD714X_PARTID_REG, &data); + ad714x->read(ad714x, AD714X_PARTID_REG, &data, 1); switch (data & 0xFFF0) { case AD7142_PARTID: ad714x->product = 0x7142; @@ -919,14 +929,12 @@ static void ad714x_hw_init(struct ad714x_chip *ad714x) ad714x->write(ad714x, AD714X_SYSCFG_REG + i, ad714x->hw->sys_cfg_reg[i]); for (i = 0; i < SYS_CFGREG_NUM; i++) - ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data); + ad714x->read(ad714x, AD714X_SYSCFG_REG + i, &data, 1); ad714x->write(ad714x, AD714X_STG_CAL_EN_REG, 0xFFF); /* clear all interrupts */ - ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data); - ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data); - ad714x->read(ad714x, STG_COM_INT_STA_REG, &data); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3); } static irqreturn_t ad714x_interrupt_thread(int irq, void *data) @@ -936,9 +944,7 @@ static irqreturn_t ad714x_interrupt_thread(int irq, void *data) mutex_lock(&ad714x->mutex); - ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state); - ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &ad714x->h_state); - ad714x->read(ad714x, STG_COM_INT_STA_REG, &ad714x->c_state); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3); for (i = 0; i < ad714x->hw->button_num; i++) ad714x_button_state_machine(ad714x, i); @@ -1225,8 +1231,6 @@ EXPORT_SYMBOL(ad714x_disable); int ad714x_enable(struct ad714x_chip *ad714x) { - unsigned short data; - dev_dbg(ad714x->dev, "%s enter\n", __func__); mutex_lock(&ad714x->mutex); @@ -1240,9 +1244,7 @@ int ad714x_enable(struct ad714x_chip *ad714x) * otherwise we will get no chance to enter falling-edge irq again */ - ad714x->read(ad714x, STG_LOW_INT_STA_REG, &data); - ad714x->read(ad714x, STG_HIGH_INT_STA_REG, &data); - ad714x->read(ad714x, STG_COM_INT_STA_REG, &data); + ad714x->read(ad714x, STG_LOW_INT_STA_REG, &ad714x->l_state, 3); mutex_unlock(&ad714x->mutex); diff --git a/drivers/input/misc/ad714x.h b/drivers/input/misc/ad714x.h index d12d14911fc3..3c85455aa66d 100644 --- a/drivers/input/misc/ad714x.h +++ b/drivers/input/misc/ad714x.h @@ -1,7 +1,7 @@ /* * AD714X CapTouch Programmable Controller driver (bus interfaces) * - * Copyright 2009 Analog Devices Inc. + * Copyright 2009-2011 Analog Devices Inc. * * Licensed under the GPL-2 or later. */ @@ -18,12 +18,12 @@ struct ad714x_platform_data; struct ad714x_driver_data; struct ad714x_chip; -typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *); +typedef int (*ad714x_read_t)(struct ad714x_chip *, unsigned short, unsigned short *, size_t); typedef int (*ad714x_write_t)(struct ad714x_chip *, unsigned short, unsigned short); struct ad714x_chip { - unsigned short h_state; unsigned short l_state; + unsigned short h_state; unsigned short c_state; unsigned short adc_reg[STAGE_NUM]; unsigned short amb_reg[STAGE_NUM]; -- cgit v1.2.3 From ecb4433550f0620f3d1471ae7099037ede30a91e Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 12 Aug 2011 14:00:59 +0200 Subject: mac80211: fix suspend/resume races with unregister hw Do not call ->suspend, ->resume methods after we unregister wiphy. Also delete sta_clanup timer after we finish wiphy unregister to avoid this: WARNING: at lib/debugobjects.c:262 debug_print_object+0x85/0xa0() Hardware name: 6369CTO ODEBUG: free active (active state 0) object type: timer_list hint: sta_info_cleanup+0x0/0x180 [mac80211] Modules linked in: aes_i586 aes_generic fuse bridge stp llc autofs4 sunrpc cpufreq_ondemand acpi_cpufreq mperf ext2 dm_mod uinput thinkpad_acpi hwmon sg arc4 rt2800usb rt2800lib crc_ccitt rt2x00usb rt2x00lib mac80211 cfg80211 i2c_i801 iTCO_wdt iTCO_vendor_support e1000e ext4 mbcache jbd2 sd_mod crc_t10dif sr_mod cdrom yenta_socket ahci libahci pata_acpi ata_generic ata_piix i915 drm_kms_helper drm i2c_algo_bit video [last unloaded: microcode] Pid: 5663, comm: pm-hibernate Not tainted 3.1.0-rc1-wl+ #19 Call Trace: [] warn_slowpath_common+0x6d/0xa0 [] ? debug_print_object+0x85/0xa0 [] ? debug_print_object+0x85/0xa0 [] warn_slowpath_fmt+0x2e/0x30 [] debug_print_object+0x85/0xa0 [] ? sta_info_alloc+0x1a0/0x1a0 [mac80211] [] debug_check_no_obj_freed+0xe2/0x180 [] kfree+0x8b/0x150 [] cfg80211_dev_free+0x7e/0x90 [cfg80211] [] wiphy_dev_release+0xd/0x10 [cfg80211] [] device_release+0x19/0x80 [] kobject_release+0x7a/0x1c0 [] ? rtnl_unlock+0x8/0x10 [] ? wiphy_resume+0x6b/0x80 [cfg80211] [] ? kobject_del+0x30/0x30 [] kref_put+0x2d/0x60 [] kobject_put+0x1d/0x50 [] ? mutex_lock+0x14/0x40 [] put_device+0xf/0x20 [] dpm_resume+0xca/0x160 [] hibernation_snapshot+0xcd/0x260 [] ? freeze_processes+0x3f/0x90 [] hibernate+0xcb/0x1e0 [] ? pm_async_store+0x40/0x40 [] state_store+0xa0/0xb0 [] ? pm_async_store+0x40/0x40 [] kobj_attr_store+0x20/0x30 [] sysfs_write_file+0x94/0xf0 [] vfs_write+0x9a/0x160 [] ? sysfs_open_file+0x200/0x200 [] sys_write+0x3d/0x70 [] sysenter_do_call+0x12/0x28 Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Signed-off-by: John W. Linville --- include/net/cfg80211.h | 3 +++ net/mac80211/main.c | 2 +- net/wireless/core.c | 7 +++++++ net/wireless/sysfs.c | 6 ++++-- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d17f47fc9e31..408ae4882d22 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1865,6 +1865,9 @@ struct wiphy { * you need use set_wiphy_dev() (see below) */ struct device dev; + /* protects ->resume, ->suspend sysfs callbacks against unregister hw */ + bool registered; + /* dir in debugfs: ieee80211/ */ struct dentry *debugfsdir; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 866f269183cf..acb44230b251 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -1012,7 +1012,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) cancel_work_sync(&local->reconfig_filter); ieee80211_clear_tx_pending(local); - sta_info_stop(local); rate_control_deinitialize(local); if (skb_queue_len(&local->skb_queue) || @@ -1024,6 +1023,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); + sta_info_stop(local); ieee80211_wep_free(local); ieee80211_led_exit(local); kfree(local->int_scan_req); diff --git a/net/wireless/core.c b/net/wireless/core.c index 645437cfc464..c14865172da7 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -616,6 +616,9 @@ int wiphy_register(struct wiphy *wiphy) if (res) goto out_rm_dev; + rtnl_lock(); + rdev->wiphy.registered = true; + rtnl_unlock(); return 0; out_rm_dev: @@ -647,6 +650,10 @@ void wiphy_unregister(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + rtnl_lock(); + rdev->wiphy.registered = false; + rtnl_unlock(); + rfkill_unregister(rdev->rfkill); /* protect the device list */ diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index c6e4ca6a7d2e..ff574597a854 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -93,7 +93,8 @@ static int wiphy_suspend(struct device *dev, pm_message_t state) if (rdev->ops->suspend) { rtnl_lock(); - ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan); + if (rdev->wiphy.registered) + ret = rdev->ops->suspend(&rdev->wiphy, rdev->wowlan); rtnl_unlock(); } @@ -112,7 +113,8 @@ static int wiphy_resume(struct device *dev) if (rdev->ops->resume) { rtnl_lock(); - ret = rdev->ops->resume(&rdev->wiphy); + if (rdev->wiphy.registered) + ret = rdev->ops->resume(&rdev->wiphy); rtnl_unlock(); } -- cgit v1.2.3 From 543cc38c8fe86deba4169977c61eb88491036837 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Fri, 12 Aug 2011 14:02:04 +0200 Subject: rt2x00: do not drop usb dev reference counter on suspend When hibernating ->resume may not be called by usb core, but disconnect and probe instead, so we do not increase the counter after decreasing it in ->supend. As a result we free memory early, and get crash when unplugging usb dongle. BUG: unable to handle kernel paging request at 6b6b6b9f IP: [] driver_sysfs_remove+0x10/0x30 *pdpt = 0000000034f21001 *pde = 0000000000000000 Pid: 20, comm: khubd Not tainted 3.1.0-rc1-wl+ #20 LENOVO 6369CTO/6369CTO EIP: 0060:[] EFLAGS: 00010202 CPU: 1 EIP is at driver_sysfs_remove+0x10/0x30 EAX: 6b6b6b6b EBX: f52bba34 ECX: 00000000 EDX: 6b6b6b6b ESI: 6b6b6b6b EDI: c0a0ea20 EBP: f61c9e68 ESP: f61c9e64 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process khubd (pid: 20, ti=f61c8000 task=f6138270 task.ti=f61c8000) Call Trace: [] __device_release_driver+0x1f/0xa0 [] device_release_driver+0x20/0x40 [] bus_remove_device+0x84/0xe0 [] ? device_remove_attrs+0x2a/0x80 [] device_del+0xe7/0x170 [] usb_disconnect+0xd4/0x180 [] hub_thread+0x691/0x1600 [] ? wake_up_bit+0x30/0x30 [] ? complete+0x49/0x60 [] ? hub_disconnect+0xd0/0xd0 [] ? hub_disconnect+0xd0/0xd0 [] kthread+0x74/0x80 [] ? kthread_worker_fn+0x150/0x150 [] kernel_thread_helper+0x6/0x10 Cc: stable@kernel.org Signed-off-by: Stanislaw Gruszka Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00usb.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c index 7fbb55c9da82..1e31050dafc9 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c @@ -871,18 +871,8 @@ int rt2x00usb_suspend(struct usb_interface *usb_intf, pm_message_t state) { struct ieee80211_hw *hw = usb_get_intfdata(usb_intf); struct rt2x00_dev *rt2x00dev = hw->priv; - int retval; - - retval = rt2x00lib_suspend(rt2x00dev, state); - if (retval) - return retval; - /* - * Decrease usbdev refcount. - */ - usb_put_dev(interface_to_usbdev(usb_intf)); - - return 0; + return rt2x00lib_suspend(rt2x00dev, state); } EXPORT_SYMBOL_GPL(rt2x00usb_suspend); @@ -891,8 +881,6 @@ int rt2x00usb_resume(struct usb_interface *usb_intf) struct ieee80211_hw *hw = usb_get_intfdata(usb_intf); struct rt2x00_dev *rt2x00dev = hw->priv; - usb_get_dev(interface_to_usbdev(usb_intf)); - return rt2x00lib_resume(rt2x00dev); } EXPORT_SYMBOL_GPL(rt2x00usb_resume); -- cgit v1.2.3 From b503c7a273c0a3018ad11ea8c513c639120afbf4 Mon Sep 17 00:00:00 2001 From: Senthil Balasubramanian Date: Fri, 19 Aug 2011 18:43:06 +0530 Subject: ath9k_hw: Fix STA (AR9485) bringup issue due to incorrect MAC address Due to some recent optimization done in the way the mac address bytes are written into the OTP memory, some AR9485 chipsets were forced to use the first byte from the eeprom template and the remaining bytes are read from OTP. AR9485 happens to use generic eeprom template which has 0x1 as the first byte causes issues in bringing up the card. So fixed the eeprom template accordingly to address the issue. Cc: stable@kernel.org Cc: Paul Stewart Signed-off-by: Senthil Balasubramanian Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/ar9003_eeprom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index c34bef1bf2b0..1b9400371eaf 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -69,7 +69,7 @@ static int ar9003_hw_power_interpolate(int32_t x, static const struct ar9300_eeprom ar9300_default = { .eepromVersion = 2, .templateVersion = 2, - .macAddr = {1, 2, 3, 4, 5, 6}, + .macAddr = {0, 2, 3, 4, 5, 6}, .custData = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, .baseEepHeader = { -- cgit v1.2.3 From 886b66ef2f2d4984f6c72d86a9d8a3ffe4344fa5 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 19 Aug 2011 22:14:47 +0200 Subject: bcma: add uevent to the bus, to autoload drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: David Woodhouse Acked-by: Rafał Miłecki Signed-off-by: John W. Linville --- drivers/bcma/main.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 873e2e4ac55f..73b7b1a18fab 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -15,6 +15,7 @@ MODULE_LICENSE("GPL"); static int bcma_bus_match(struct device *dev, struct device_driver *drv); static int bcma_device_probe(struct device *dev); static int bcma_device_remove(struct device *dev); +static int bcma_device_uevent(struct device *dev, struct kobj_uevent_env *env); static ssize_t manuf_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -49,6 +50,7 @@ static struct bus_type bcma_bus_type = { .match = bcma_bus_match, .probe = bcma_device_probe, .remove = bcma_device_remove, + .uevent = bcma_device_uevent, .dev_attrs = bcma_device_attrs, }; @@ -227,6 +229,16 @@ static int bcma_device_remove(struct device *dev) return 0; } +static int bcma_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct bcma_device *core = container_of(dev, struct bcma_device, dev); + + return add_uevent_var(env, + "MODALIAS=bcma:m%04Xid%04Xrev%02Xcl%02X", + core->id.manuf, core->id.id, + core->id.rev, core->id.class); +} + static int __init bcma_modinit(void) { int err; -- cgit v1.2.3 From d5c073caf050bc713271a02e016b1672d9b7b935 Mon Sep 17 00:00:00 2001 From: Geoffrey Thomas Date: Mon, 22 Aug 2011 11:28:57 -0700 Subject: net: Documentation: RFC 2553bis is now RFC 3493 Signed-off-by: Geoffrey Thomas Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index db2a4067013c..81546990f41c 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -992,7 +992,7 @@ bindv6only - BOOLEAN TRUE: disable IPv4-mapped address feature FALSE: enable IPv4-mapped address feature - Default: FALSE (as specified in RFC2553bis) + Default: FALSE (as specified in RFC3493) IPv6 Fragmentation: -- cgit v1.2.3 From fcb8ce5cfe30ca9ca5c9a79cdfe26d1993e65e0c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 22 Aug 2011 11:42:53 -0700 Subject: Linux 3.1-rc3 --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 3241d41dfbff..788511f86a62 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ VERSION = 3 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc2 -NAME = Wet Seal +EXTRAVERSION = -rc3 +NAME = "Divemaster Edition" # *DOCUMENTATION* # To see a list of typical targets execute "make help" -- cgit v1.2.3 From 052605c6caa3e1edf8eee8fe5fe6d53f5721f39a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 26 Jul 2011 17:48:43 -0700 Subject: target: Make standard INQUIRY return 'not connected' for tpg_virt_lun0 This patch changes target_emulate_inquiry_std() to set the 'not connected' (0x35) bit in standard INQUIRY response data when we are processing a request to a virtual LUN=0 mapping from struct se_device *g_lun0_dev that have been setup for us in transport_lookup_cmd_lun(). This addresses an issue where qla2xxx FC clients need to be able to create demo-mode I_T FC Nexuses by default, but should not be exposing the default set of TPG LUNs to all FC clients. This includes adding an new optional target_core_fabric_ops->tpg_check_demo_mode_login_only() caller to allow demo_mode nexuses to skip the old default of bulding a demo-mode MappedLUNs list via core_tpg_add_node_to_devs(). (roland: Add missing tpg_check_demo_mode_login_only check in core_dev_add_lun) Reported-by: Roland Dreier Cc: Andrew Vasquez Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_cdb.c | 11 ++++++++--- drivers/target/target_core_device.c | 4 +++- drivers/target/target_core_tpg.c | 12 ++++++++++-- include/target/target_core_fabric_ops.h | 6 ++++++ 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index 8ae09a1bdf74..d095408dbd5f 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -67,6 +67,7 @@ target_emulate_inquiry_std(struct se_cmd *cmd) { struct se_lun *lun = cmd->se_lun; struct se_device *dev = cmd->se_dev; + struct se_portal_group *tpg = lun->lun_sep->sep_tpg; unsigned char *buf; /* @@ -81,9 +82,13 @@ target_emulate_inquiry_std(struct se_cmd *cmd) buf = transport_kmap_first_data_page(cmd); - buf[0] = dev->transport->get_device_type(dev); - if (buf[0] == TYPE_TAPE) - buf[1] = 0x80; + if (dev == tpg->tpg_virt_lun0.lun_se_dev) { + buf[0] = 0x3f; /* Not connected */ + } else { + buf[0] = dev->transport->get_device_type(dev); + if (buf[0] == TYPE_TAPE) + buf[1] = 0x80; + } buf[2] = dev->transport->get_device_rev(dev); /* diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index b38b6c993e65..ec3fbcda3e3c 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1346,7 +1346,9 @@ struct se_lun *core_dev_add_lun( struct se_node_acl *acl; spin_lock_bh(&tpg->acl_node_lock); list_for_each_entry(acl, &tpg->acl_node_list, acl_list) { - if (acl->dynamic_node_acl) { + if (acl->dynamic_node_acl && + (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only || + !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) { spin_unlock_bh(&tpg->acl_node_lock); core_tpg_add_node_to_devs(acl, tpg); spin_lock_bh(&tpg->acl_node_lock); diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 4f1ba4c5ef11..718ccd1348b1 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -298,8 +298,16 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( tpg->se_tpg_tfo->tpg_release_fabric_acl(tpg, acl); return NULL; } - - core_tpg_add_node_to_devs(acl, tpg); + /* + * Here we only create demo-mode MappedLUNs from the active + * TPG LUNs if the fabric is not explictly asking for + * tpg_check_demo_mode_login_only() == 1. + */ + if ((tpg->se_tpg_tfo->tpg_check_demo_mode_login_only != NULL) && + (tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg) == 1)) + do { ; } while (0); + else + core_tpg_add_node_to_devs(acl, tpg); spin_lock_bh(&tpg->acl_node_lock); list_add_tail(&acl->acl_list, &tpg->acl_node_list); diff --git a/include/target/target_core_fabric_ops.h b/include/target/target_core_fabric_ops.h index 2de8fe907596..126c675f4f14 100644 --- a/include/target/target_core_fabric_ops.h +++ b/include/target/target_core_fabric_ops.h @@ -27,6 +27,12 @@ struct target_core_fabric_ops { int (*tpg_check_demo_mode_cache)(struct se_portal_group *); int (*tpg_check_demo_mode_write_protect)(struct se_portal_group *); int (*tpg_check_prod_mode_write_protect)(struct se_portal_group *); + /* + * Optionally used by fabrics to allow demo-mode login, but not + * expose any TPG LUNs, and return 'not connected' in standard + * inquiry response + */ + int (*tpg_check_demo_mode_login_only)(struct se_portal_group *); struct se_node_acl *(*tpg_alloc_fabric_acl)( struct se_portal_group *); void (*tpg_release_fabric_acl)(struct se_portal_group *, -- cgit v1.2.3 From e1750ba20f0d850c38820190ccbf0f647723091a Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Mon, 1 Aug 2011 23:58:18 +0200 Subject: target: Use ERR_CAST inlined function Use ERR_CAST inlined function instead of ERR_PTR(PTR_ERR(...)) The semantic patch that makes this output is available in scripts/coccinelle/api/err_cast.cocci. More information about semantic patching is available at http://coccinelle.lip6.fr/ Signed-off-by: Thomas Meyer Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_configfs.c | 4 ++-- drivers/target/target_core_fabric_configfs.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index f095e65b1ccf..f1643dbf6a92 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -268,7 +268,7 @@ struct se_tpg_np *lio_target_call_addnptotpg( ISCSI_TCP); if (IS_ERR(tpg_np)) { iscsit_put_tpg(tpg); - return ERR_PTR(PTR_ERR(tpg_np)); + return ERR_CAST(tpg_np); } pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n"); @@ -1285,7 +1285,7 @@ struct se_wwn *lio_target_call_coreaddtiqn( tiqn = iscsit_add_tiqn((unsigned char *)name); if (IS_ERR(tiqn)) - return ERR_PTR(PTR_ERR(tiqn)); + return ERR_CAST(tiqn); /* * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group. */ diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c index f1654694f4ea..55bbe0847a6d 100644 --- a/drivers/target/target_core_fabric_configfs.c +++ b/drivers/target/target_core_fabric_configfs.c @@ -481,7 +481,7 @@ static struct config_group *target_fabric_make_nodeacl( se_nacl = tf->tf_ops.fabric_make_nodeacl(se_tpg, group, name); if (IS_ERR(se_nacl)) - return ERR_PTR(PTR_ERR(se_nacl)); + return ERR_CAST(se_nacl); nacl_cg = &se_nacl->acl_group; nacl_cg->default_groups = se_nacl->acl_default_groups; -- cgit v1.2.3 From 9be08c5804ae4ad96ec22d0b1e71e630803a85ea Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Tue, 2 Aug 2011 10:26:36 +0200 Subject: iscsi-target: Fix leak on failure in iscsi_copy_param_list() We leak memory if the allocations for 'new_param->name' or 'new_param->value' fail in iscsi_target_parameters.c::iscsi_copy_param_list() We also do a lot of variable assignments that are completely pointless if the allocations fail. So, let's move the allocations before the assignments and also make sure that we free whatever was allocated to one if the allocation fail. There's also some small CodingStyle fixups in there (curly braces on both branches of if statement, only one variable per line) since I was in the area anyway. And finally, error messages in the function are put on a single line for easy grep'abillity. Signed-off-by: Jesper Juhl Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_parameters.c | 43 ++++++++++---------------- 1 file changed, 16 insertions(+), 27 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c index 252e246cf51e..497b2e718a76 100644 --- a/drivers/target/iscsi/iscsi_target_parameters.c +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -545,13 +545,13 @@ int iscsi_copy_param_list( struct iscsi_param_list *src_param_list, int leading) { - struct iscsi_param *new_param = NULL, *param = NULL; + struct iscsi_param *param = NULL; + struct iscsi_param *new_param = NULL; struct iscsi_param_list *param_list = NULL; param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL); if (!param_list) { - pr_err("Unable to allocate memory for" - " struct iscsi_param_list.\n"); + pr_err("Unable to allocate memory for struct iscsi_param_list.\n"); goto err_out; } INIT_LIST_HEAD(¶m_list->param_list); @@ -567,8 +567,17 @@ int iscsi_copy_param_list( new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL); if (!new_param) { - pr_err("Unable to allocate memory for" - " struct iscsi_param.\n"); + pr_err("Unable to allocate memory for struct iscsi_param.\n"); + goto err_out; + } + + new_param->name = kstrdup(param->name, GFP_KERNEL); + new_param->value = kstrdup(param->value, GFP_KERNEL); + if (!new_param->value || !new_param->name) { + kfree(new_param->value); + kfree(new_param->name); + kfree(new_param); + pr_err("Unable to allocate memory for parameter name/value.\n"); goto err_out; } @@ -580,32 +589,12 @@ int iscsi_copy_param_list( new_param->use = param->use; new_param->type_range = param->type_range; - new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL); - if (!new_param->name) { - pr_err("Unable to allocate memory for" - " parameter name.\n"); - goto err_out; - } - - new_param->value = kzalloc(strlen(param->value) + 1, - GFP_KERNEL); - if (!new_param->value) { - pr_err("Unable to allocate memory for" - " parameter value.\n"); - goto err_out; - } - - memcpy(new_param->name, param->name, strlen(param->name)); - new_param->name[strlen(param->name)] = '\0'; - memcpy(new_param->value, param->value, strlen(param->value)); - new_param->value[strlen(param->value)] = '\0'; - list_add_tail(&new_param->p_list, ¶m_list->param_list); } - if (!list_empty(¶m_list->param_list)) + if (!list_empty(¶m_list->param_list)) { *dst_param_list = param_list; - else { + } else { pr_err("No parameters allocated.\n"); goto err_out; } -- cgit v1.2.3 From 6fc6148865c9a17cee33f251723f6a056f022ecd Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 2 Aug 2011 12:35:02 +0200 Subject: target: Convert target_core_rd.c to use use BUG_ON Use BUG_ON(x) rather than if(x) BUG(); The semantic patch that fixes this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; @@ -if (x) BUG(); +BUG_ON(x); @@ identifier x; @@ -if (!x) BUG(); +BUG_ON(!x); // Signed-off-by: Julia Lawall Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_rd.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 3dd81d24d9a9..e567e129c697 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -390,12 +390,10 @@ static int rd_MEMCPY_read(struct rd_request *req) length = req->rd_size; dst = sg_virt(&sg_d[i++]) + dst_offset; - if (!dst) - BUG(); + BUG_ON(!dst); src = sg_virt(&sg_s[j]) + src_offset; - if (!src) - BUG(); + BUG_ON(!src); dst_offset = 0; src_offset = length; @@ -415,8 +413,7 @@ static int rd_MEMCPY_read(struct rd_request *req) length = req->rd_size; dst = sg_virt(&sg_d[i]) + dst_offset; - if (!dst) - BUG(); + BUG_ON(!dst); if (sg_d[i].length == length) { i++; @@ -425,8 +422,7 @@ static int rd_MEMCPY_read(struct rd_request *req) dst_offset = length; src = sg_virt(&sg_s[j++]) + src_offset; - if (!src) - BUG(); + BUG_ON(!src); src_offset = 0; page_end = 1; @@ -510,12 +506,10 @@ static int rd_MEMCPY_write(struct rd_request *req) length = req->rd_size; src = sg_virt(&sg_s[i++]) + src_offset; - if (!src) - BUG(); + BUG_ON(!src); dst = sg_virt(&sg_d[j]) + dst_offset; - if (!dst) - BUG(); + BUG_ON(!dst); src_offset = 0; dst_offset = length; @@ -535,8 +529,7 @@ static int rd_MEMCPY_write(struct rd_request *req) length = req->rd_size; src = sg_virt(&sg_s[i]) + src_offset; - if (!src) - BUG(); + BUG_ON(!src); if (sg_s[i].length == length) { i++; @@ -545,8 +538,7 @@ static int rd_MEMCPY_write(struct rd_request *req) src_offset = length; dst = sg_virt(&sg_d[j++]) + dst_offset; - if (!dst) - BUG(); + BUG_ON(!dst); dst_offset = 0; page_end = 1; -- cgit v1.2.3 From c2337c709102b343bd917ae00c79b266fb15b871 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Aug 2011 14:02:27 -0700 Subject: iscsi-target: remove duplicate return We returned on the line before already. Signed-off-by: Dan Carpenter Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index c24fb10de60b..6a4ea29c2f36 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -2243,7 +2243,6 @@ static int iscsit_handle_snack( case 0: return iscsit_handle_recovery_datain_or_r2t(conn, buf, hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength); - return 0; case ISCSI_FLAG_SNACK_TYPE_STATUS: return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength); -- cgit v1.2.3 From 387e96c05299ca7a0ade874f343f91f0b01086a0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 8 Aug 2011 14:06:44 -0700 Subject: iscsi-target: forever loop bug in iscsit_attach_ooo_cmdsn() This patch fixes a forever loop bug in iscsit_attach_ooo_cmdsn() while walking sess->sess_ooo_cmdsn_list when the received CmdSN is less than the tail of the list. Signed-off-by: Dan Carpenter Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_erl1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c index 980650792cf6..c4c68da3e500 100644 --- a/drivers/target/iscsi/iscsi_target_erl1.c +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -834,7 +834,7 @@ static int iscsit_attach_ooo_cmdsn( */ list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list, ooo_list) { - while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn) + if (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn) continue; list_add(&ooo_cmdsn->ooo_list, -- cgit v1.2.3 From 16ab8e60a0ebc22cfbe61d84e620457a15f3a0bc Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 8 Aug 2011 19:03:38 -0700 Subject: target: Fix write payload exception handling with ->new_cmd_map This patch fixes a bug for fabrics using tfo->new_cmd_map() that are expect transport_generic_request_failure() to be calling transport_send_check_condition_and_sense() for both READ and WRITE, instead of only for READ exceptions. This was originally observed with a failed WRITE_SAME_16 w/ unmap=0 using tcm_loop. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index cc5a339d4d5a..fd7d4518b8ef 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2053,8 +2053,14 @@ static void transport_generic_request_failure( cmd->scsi_sense_reason = TCM_UNSUPPORTED_SCSI_OPCODE; break; } - - if (!sc) + /* + * If a fabric does not define a cmd->se_tfo->new_cmd_map caller, + * make the call to transport_send_check_condition_and_sense() + * directly. Otherwise expect the fabric to make the call to + * transport_send_check_condition_and_sense() after handling + * possible unsoliticied write data payloads. + */ + if (!sc && !cmd->se_tfo->new_cmd_map) transport_new_cmd_failure(cmd); else { ret = transport_send_check_condition_and_sense(cmd, -- cgit v1.2.3 From 706d5860969b3b24d65d9a57bd3bb5e4a1419c08 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Thu, 28 Jul 2011 00:07:03 -0700 Subject: target: Add WRITE_SAME (10) parsing and refactor passthrough checks This patch adds initial WRITE_SAME (10) w/ UNMAP=1 support following updates in sbcr26 to allow UNMAP=1 for the non 16 + 32 byte CDB case. It also refactors current pSCSI passthrough passthrough checks into target_check_write_same_discard() ahead of UNMAP=0 w/ write payload support into target_core_iblock.c. This includes the support for handling WRITE_SAME in transport_emulate_control_cdb(), and converts target_emulate_write_same to accept num_blocks directly for WRITE_SAME, WRITE_SAME_16 and WRITE_SAME_32. Reported-by: Eric Seppanen Cc: Roland Dreier Cc: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_cdb.c | 28 +++++---- drivers/target/target_core_transport.c | 102 ++++++++++++++++++--------------- 2 files changed, 73 insertions(+), 57 deletions(-) diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index d095408dbd5f..4c1d3a98e894 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -1090,24 +1090,17 @@ err: * Note this is not used for TCM/pSCSI passthrough */ static int -target_emulate_write_same(struct se_task *task, int write_same32) +target_emulate_write_same(struct se_task *task, u32 num_blocks) { struct se_cmd *cmd = task->task_se_cmd; struct se_device *dev = cmd->se_dev; sector_t range; sector_t lba = cmd->t_task_lba; - unsigned int num_blocks; int ret; /* - * Extract num_blocks from the WRITE_SAME_* CDB. Then use the explict - * range when non zero is supplied, otherwise calculate the remaining - * range based on ->get_blocks() - starting LBA. + * Use the explicit range when non zero is supplied, otherwise calculate + * the remaining range based on ->get_blocks() - starting LBA. */ - if (write_same32) - num_blocks = get_unaligned_be32(&cmd->t_task_cdb[28]); - else - num_blocks = get_unaligned_be32(&cmd->t_task_cdb[10]); - if (num_blocks != 0) range = num_blocks; else @@ -1170,13 +1163,23 @@ transport_emulate_control_cdb(struct se_task *task) } ret = target_emulate_unmap(task); break; + case WRITE_SAME: + if (!dev->transport->do_discard) { + pr_err("WRITE_SAME emulation not supported" + " for: %s\n", dev->transport->name); + return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE; + } + ret = target_emulate_write_same(task, + get_unaligned_be16(&cmd->t_task_cdb[7])); + break; case WRITE_SAME_16: if (!dev->transport->do_discard) { pr_err("WRITE_SAME_16 emulation not supported" " for: %s\n", dev->transport->name); return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE; } - ret = target_emulate_write_same(task, 0); + ret = target_emulate_write_same(task, + get_unaligned_be32(&cmd->t_task_cdb[10])); break; case VARIABLE_LENGTH_CMD: service_action = @@ -1189,7 +1192,8 @@ transport_emulate_control_cdb(struct se_task *task) dev->transport->name); return PYX_TRANSPORT_UNKNOWN_SAM_OPCODE; } - ret = target_emulate_write_same(task, 1); + ret = target_emulate_write_same(task, + get_unaligned_be32(&cmd->t_task_cdb[28])); break; default: pr_err("Unsupported VARIABLE_LENGTH_CMD SA:" diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index fd7d4518b8ef..eb8055aa6e61 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2861,6 +2861,38 @@ static int transport_cmd_get_valid_sectors(struct se_cmd *cmd) return sectors; } +static int target_check_write_same_discard(unsigned char *flags, struct se_device *dev) +{ + /* + * Determine if the received WRITE_SAME is used to for direct + * passthrough into Linux/SCSI with struct request via TCM/pSCSI + * or we are signaling the use of internal WRITE_SAME + UNMAP=1 + * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK code. + */ + int passthrough = (dev->transport->transport_type == + TRANSPORT_PLUGIN_PHBA_PDEV); + + if (!passthrough) { + if ((flags[0] & 0x04) || (flags[0] & 0x02)) { + pr_err("WRITE_SAME PBDATA and LBDATA" + " bits not supported for Block Discard" + " Emulation\n"); + return -ENOSYS; + } + /* + * Currently for the emulated case we only accept + * tpws with the UNMAP=1 bit set. + */ + if (!(flags[0] & 0x08)) { + pr_err("WRITE_SAME w/o UNMAP bit not" + " supported for Block Discard Emulation\n"); + return -ENOSYS; + } + } + + return 0; +} + /* transport_generic_cmd_sequencer(): * * Generic Command Sequencer that should work for most DAS transport @@ -3081,27 +3113,9 @@ static int transport_generic_cmd_sequencer( cmd->t_task_lba = get_unaligned_be64(&cdb[12]); cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB; - /* - * Skip the remaining assignments for TCM/PSCSI passthrough - */ - if (passthrough) - break; - - if ((cdb[10] & 0x04) || (cdb[10] & 0x02)) { - pr_err("WRITE_SAME PBDATA and LBDATA" - " bits not supported for Block Discard" - " Emulation\n"); + if (target_check_write_same_discard(&cdb[10], dev) < 0) goto out_invalid_cdb_field; - } - /* - * Currently for the emulated case we only accept - * tpws with the UNMAP=1 bit set. - */ - if (!(cdb[10] & 0x08)) { - pr_err("WRITE_SAME w/o UNMAP bit not" - " supported for Block Discard Emulation\n"); - goto out_invalid_cdb_field; - } + break; default: pr_err("VARIABLE_LENGTH_CMD service action" @@ -3358,33 +3372,31 @@ static int transport_generic_cmd_sequencer( } cmd->t_task_lba = get_unaligned_be64(&cdb[2]); - passthrough = (dev->transport->transport_type == - TRANSPORT_PLUGIN_PHBA_PDEV); - /* - * Determine if the received WRITE_SAME_16 is used to for direct - * passthrough into Linux/SCSI with struct request via TCM/pSCSI - * or we are signaling the use of internal WRITE_SAME + UNMAP=1 - * emulation for -> Linux/BLOCK disbard with TCM/IBLOCK and - * TCM/FILEIO subsystem plugin backstores. - */ - if (!passthrough) { - if ((cdb[1] & 0x04) || (cdb[1] & 0x02)) { - pr_err("WRITE_SAME PBDATA and LBDATA" - " bits not supported for Block Discard" - " Emulation\n"); - goto out_invalid_cdb_field; - } - /* - * Currently for the emulated case we only accept - * tpws with the UNMAP=1 bit set. - */ - if (!(cdb[1] & 0x08)) { - pr_err("WRITE_SAME w/o UNMAP bit not " - " supported for Block Discard Emulation\n"); - goto out_invalid_cdb_field; - } + cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB; + + if (target_check_write_same_discard(&cdb[1], dev) < 0) + goto out_invalid_cdb_field; + break; + case WRITE_SAME: + sectors = transport_get_sectors_10(cdb, cmd, §or_ret); + if (sector_ret) + goto out_unsupported_cdb; + + if (sectors) + size = transport_get_size(sectors, cdb, cmd); + else { + pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n"); + goto out_invalid_cdb_field; } + + cmd->t_task_lba = get_unaligned_be32(&cdb[2]); cmd->se_cmd_flags |= SCF_SCSI_CONTROL_SG_IO_CDB; + /* + * Follow sbcr26 with WRITE_SAME (10) and check for the existence + * of byte 1 bit 3 UNMAP instead of original reserved field + */ + if (target_check_write_same_discard(&cdb[1], dev) < 0) + goto out_invalid_cdb_field; break; case ALLOW_MEDIUM_REMOVAL: case GPCMD_CLOSE_TRACK: -- cgit v1.2.3 From 12850626e2717f866a94e6ced724e3efe5a0aab8 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Mon, 8 Aug 2011 19:08:23 -0700 Subject: target: Fix WRITE_SAME usage with transport_get_size For all flavours of WRITE_SAME, we only expect to handle a single block of data-out buffer payload, regardless of the number of logical blocks presented in the CDB. This patch changes all flavours of WRITE_SAME in transport_generic_cmd_sequencer() to pass '1' into transport_get_size() instead of the extracted 'sectors' to properly handle the default usage of sg_write_same without the --xferlen parameter. Reported-by: Eric Seppanen Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index eb8055aa6e61..d35c2cc779e9 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3103,7 +3103,7 @@ static int transport_generic_cmd_sequencer( goto out_unsupported_cdb; if (sectors) - size = transport_get_size(sectors, cdb, cmd); + size = transport_get_size(1, cdb, cmd); else { pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not" " supported\n"); @@ -3365,7 +3365,7 @@ static int transport_generic_cmd_sequencer( goto out_unsupported_cdb; if (sectors) - size = transport_get_size(sectors, cdb, cmd); + size = transport_get_size(1, cdb, cmd); else { pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n"); goto out_invalid_cdb_field; @@ -3383,7 +3383,7 @@ static int transport_generic_cmd_sequencer( goto out_unsupported_cdb; if (sectors) - size = transport_get_size(sectors, cdb, cmd); + size = transport_get_size(1, cdb, cmd); else { pr_err("WSNZ=1, WRITE_SAME w/sectors=0 not supported\n"); goto out_invalid_cdb_field; -- cgit v1.2.3 From 72f4ba1e32a1e5da31dcf14ea4b8985ae88a8bdb Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Tue, 9 Aug 2011 22:53:02 -0700 Subject: target: Remove duplicate task completions in transport_emulate_control_cdb This patch removes a duplicate set of transport_complete_task() calls in target_emulate_unmap() and target_emulate_write_same() as the completion call is already done within transport_emulate_control_cdb() This patch also adds a check in transport_emulate_control_cdb() for the existing SCF_EMULATE_CDB_ASYNC flag currently used by SYNCHRONIZE_CACHE in order to handle IMMEDIATE processing. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_cdb.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index 4c1d3a98e894..40ad142f7cb3 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -1077,8 +1077,6 @@ target_emulate_unmap(struct se_task *task) size -= 16; } - task->task_scsi_status = GOOD; - transport_complete_task(task, 1); err: transport_kunmap_first_data_page(cmd); @@ -1115,8 +1113,6 @@ target_emulate_write_same(struct se_task *task, u32 num_blocks) return ret; } - task->task_scsi_status = GOOD; - transport_complete_task(task, 1); return 0; } @@ -1228,8 +1224,14 @@ transport_emulate_control_cdb(struct se_task *task) if (ret < 0) return ret; - task->task_scsi_status = GOOD; - transport_complete_task(task, 1); + /* + * Handle the successful completion here unless a caller + * has explictly requested an asychronous completion. + */ + if (!(cmd->se_cmd_flags & SCF_EMULATE_CDB_ASYNC)) { + task->task_scsi_status = GOOD; + transport_complete_task(task, 1); + } return PYX_TRANSPORT_SENT_TO_TRANSPORT; } -- cgit v1.2.3 From 7abbe7f3e4243e28a9169ee1b8d76f10a6f5d37c Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 10 Aug 2011 18:41:14 -0700 Subject: target: Fix SYNCHRONIZE_CACHE zero LBA + range breakage This patch fixes a SYNCHRONIZE_CACHE CDB handling bug with IBLOCK/FILEIO backends where transport_cmd_get_valid_sectors() was incorrectly rejecting a zero LBA + range CDB from being processed, and returning CHECK_CONDITION. This includes changing transport_cmd_get_valid_sectors() to return '0' on success and '-EINVAL' on failure (this makes more sense than sectors), and to only check transport_cmd_get_valid_sectors() when a non zero LBA + range SYNCHRONIZE_CACHE operation has been receieved for the non passthrough case. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index d35c2cc779e9..d385c317a7a4 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2853,12 +2853,10 @@ static int transport_cmd_get_valid_sectors(struct se_cmd *cmd) " transport_dev_end_lba(): %llu\n", cmd->t_task_lba, sectors, transport_dev_end_lba(dev)); - pr_err(" We should return CHECK_CONDITION" - " but we don't yet\n"); - return 0; + return -EINVAL; } - return sectors; + return 0; } static int target_check_write_same_discard(unsigned char *flags, struct se_device *dev) @@ -3350,10 +3348,12 @@ static int transport_generic_cmd_sequencer( cmd->se_cmd_flags |= SCF_EMULATE_CDB_ASYNC; /* * Check to ensure that LBA + Range does not exceed past end of - * device. + * device for IBLOCK and FILEIO ->do_sync_cache() backend calls */ - if (!transport_cmd_get_valid_sectors(cmd)) - goto out_invalid_cdb_field; + if ((cmd->t_task_lba != 0) || (sectors != 0)) { + if (transport_cmd_get_valid_sectors(cmd) < 0) + goto out_invalid_cdb_field; + } break; case UNMAP: size = get_unaligned_be16(&cdb[7]); -- cgit v1.2.3 From 01cde4d54327884a0b61ce8666092f5703557d4b Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Wed, 10 Aug 2011 00:59:58 -0700 Subject: target: Add missing DATA_SG_IO transport_cmd_get_valid_sectors check This patch adds the missing transport_cmd_get_valid_sectors() check for SCF_SCSI_DATA_SG_IO_CDB type payloads to ensure that a received LBA + range does not exeed past the end of associated backend struct se_device. This patch also fixes a bug in the failure path of transport_new_cmd_obj() where this check can fail, so change to use a signed 'rc' and return '-EINVAL' to signal proper transport_generic_request_failure() handling. Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index d385c317a7a4..ab61c5550852 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -3891,9 +3891,7 @@ EXPORT_SYMBOL(transport_generic_map_mem_to_cmd); static int transport_new_cmd_obj(struct se_cmd *cmd) { struct se_device *dev = cmd->se_dev; - u32 task_cdbs; - u32 rc; - int set_counts = 1; + int set_counts = 1, rc, task_cdbs; /* * Setup any BIDI READ tasks and memory from @@ -3911,7 +3909,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd) cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - return PYX_TRANSPORT_LU_COMM_FAILURE; + return -EINVAL; } atomic_inc(&cmd->t_fe_count); atomic_inc(&cmd->t_se_count); @@ -3930,7 +3928,7 @@ static int transport_new_cmd_obj(struct se_cmd *cmd) cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - return PYX_TRANSPORT_LU_COMM_FAILURE; + return -EINVAL; } if (set_counts) { @@ -4248,10 +4246,13 @@ static u32 transport_allocate_tasks( struct scatterlist *sgl, unsigned int sgl_nents) { - if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) + if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB) { + if (transport_cmd_get_valid_sectors(cmd) < 0) + return -EINVAL; + return transport_allocate_data_tasks(cmd, lba, data_direction, sgl, sgl_nents); - else + } else return transport_allocate_control_task(cmd); } -- cgit v1.2.3 From 525a48a21da259d00d6ebc5b60563b5bcf022c26 Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 13 Aug 2011 02:11:38 -0700 Subject: target: Fix task count > 1 handling breakage and use max_sector page alignment This patch addresses recent breakage with multiple se_task (task_count > 1) operation following backend dev->se_sub_dev->se_dev_attrib.max_sectors in new transport_allocate_data_tasks() code. The initial bug here was a bogus task->task_sg_nents assignment in transport_allocate_data_tasks() based on the passed parameter, which now uses DIV_ROUND_UP(task_size, PAGE_SIZE) to determine the proper number of per task SGL entries for the (task_count > 1) case. This also means we now need to enforce a PAGE_SIZE aligned max_sector count value for this to work as expected without bringing back the pre v3.1 transport_map_mem_to_sg() logic to handle SGL offsets across multiple tasks. So this patch adds se_dev_align_max_sectors() to round down max_sectors as necessary to ensure this alignment via se_dev_set_default_attribs() and se_dev_align_max_sectors() and keeps it simple for (task_count > 1) operation. So far this bugfix has been tested with (task_count > 1) operation using iscsi-target and iblock backends. Reported-by: Chris Boot Cc: Kiran Patil Cc: Andy Grover Cc: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 28 ++++++++++++++++++++++++++++ drivers/target/target_core_transport.c | 7 +++++-- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index ec3fbcda3e3c..4b5237f500a5 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -839,6 +839,24 @@ int se_dev_check_shutdown(struct se_device *dev) return ret; } +u32 se_dev_align_max_sectors(u32 max_sectors, u32 block_size) +{ + u32 tmp, aligned_max_sectors; + /* + * Limit max_sectors to a PAGE_SIZE aligned value for modern + * transport_allocate_data_tasks() operation. + */ + tmp = rounddown((max_sectors * block_size), PAGE_SIZE); + aligned_max_sectors = (tmp / block_size); + if (max_sectors != aligned_max_sectors) { + printk(KERN_INFO "Rounding down aligned max_sectors from %u" + " to %u\n", max_sectors, aligned_max_sectors); + return aligned_max_sectors; + } + + return max_sectors; +} + void se_dev_set_default_attribs( struct se_device *dev, struct se_dev_limits *dev_limits) @@ -878,6 +896,11 @@ void se_dev_set_default_attribs( * max_sectors is based on subsystem plugin dependent requirements. */ dev->se_sub_dev->se_dev_attrib.hw_max_sectors = limits->max_hw_sectors; + /* + * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks() + */ + limits->max_sectors = se_dev_align_max_sectors(limits->max_sectors, + limits->logical_block_size); dev->se_sub_dev->se_dev_attrib.max_sectors = limits->max_sectors; /* * Set optimal_sectors from max_sectors, which can be lowered via @@ -1242,6 +1265,11 @@ int se_dev_set_max_sectors(struct se_device *dev, u32 max_sectors) return -EINVAL; } } + /* + * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks() + */ + max_sectors = se_dev_align_max_sectors(max_sectors, + dev->se_sub_dev->se_dev_attrib.block_size); dev->se_sub_dev->se_dev_attrib.max_sectors = max_sectors; pr_debug("dev[%p]: SE Device max_sectors changed to %u\n", diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index ab61c5550852..efac6a9a7438 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -4126,7 +4126,11 @@ static int transport_allocate_data_tasks( /* Update new cdb with updated lba/sectors */ cmd->transport_split_cdb(task->task_lba, task->task_sectors, cdb); - + /* + * This now assumes that passed sg_ents are in PAGE_SIZE chunks + * in order to calculate the number per task SGL entries + */ + task->task_sg_nents = DIV_ROUND_UP(task->task_size, PAGE_SIZE); /* * Check if the fabric module driver is requesting that all * struct se_task->task_sg[] be chained together.. If so, @@ -4136,7 +4140,6 @@ static int transport_allocate_data_tasks( * It's so much easier and only a waste when task_count > 1. * That is extremely rare. */ - task->task_sg_nents = sgl_nents; if (cmd->se_tfo->task_sg_chaining) { task->task_sg_nents++; task->task_padded_sg = 1; -- cgit v1.2.3 From c3c74c7a33d837be391ab61aaae39bb21f16736a Mon Sep 17 00:00:00 2001 From: Nicholas Bellinger Date: Sat, 13 Aug 2011 05:30:31 -0700 Subject: target: Fix task SGL chaining breakage with transport_allocate_data_tasks This patch fixes two bugs associated with transport_do_task_sg_chain() operation where transport_allocate_data_tasks() was incorrectly setting task_padded_sg for all tasks, and causing bogus task->task_sg_nents assignments + OOPsen with fabrics depending upon this code. The first bit here adds a task_sg_nents_padded check in transport_allocate_data_tasks() to include an extra SGL vector when necessary for tasks that expect to be linked using sg_chain(). The second change involves making transport_do_task_sg_chain() properly account for the extra SGL vector when task->task_padded_sg is set for the non trailing ->task_sg or single ->task_sg allocations. Note this patch also removes the BUG_ON(!task->task_padded_sg) check within transport_do_task_sg_chain() as we expect this to happen normally with the updated logic in transport_allocate_data_tasks(), along with being bogus for CONTROL_SG_IO_CDB type payloads. So far this bugfix has been tested with tcm_qla2xxx and iblock backends in (task_count > 1)( and (task_count == 1) operation. Reported-by: Kiran Patil Cc: Kiran Patil Cc: Andy Grover Cc: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index efac6a9a7438..9cc49d1b5b1f 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -4044,8 +4044,6 @@ void transport_do_task_sg_chain(struct se_cmd *cmd) if (!task->task_sg) continue; - BUG_ON(!task->task_padded_sg); - if (!sg_first) { sg_first = task->task_sg; chained_nents = task->task_sg_nents; @@ -4053,9 +4051,19 @@ void transport_do_task_sg_chain(struct se_cmd *cmd) sg_chain(sg_prev, sg_prev_nents, task->task_sg); chained_nents += task->task_sg_nents; } + /* + * For the padded tasks, use the extra SGL vector allocated + * in transport_allocate_data_tasks() for the sg_prev_nents + * offset into sg_chain() above.. The last task of a + * multi-task list, or a single task will not have + * task->task_sg_padded set.. + */ + if (task->task_padded_sg) + sg_prev_nents = (task->task_sg_nents + 1); + else + sg_prev_nents = task->task_sg_nents; sg_prev = task->task_sg; - sg_prev_nents = task->task_sg_nents; } /* * Setup the starting pointer and total t_tasks_sg_linked_no including @@ -4107,7 +4115,7 @@ static int transport_allocate_data_tasks( cmd_sg = sgl; for (i = 0; i < task_count; i++) { - unsigned int task_size; + unsigned int task_size, task_sg_nents_padded; int count; task = transport_generic_get_task(cmd, data_direction); @@ -4135,24 +4143,24 @@ static int transport_allocate_data_tasks( * Check if the fabric module driver is requesting that all * struct se_task->task_sg[] be chained together.. If so, * then allocate an extra padding SG entry for linking and - * marking the end of the chained SGL. - * Possibly over-allocate task sgl size by using cmd sgl size. - * It's so much easier and only a waste when task_count > 1. - * That is extremely rare. + * marking the end of the chained SGL for every task except + * the last one for (task_count > 1) operation, or skipping + * the extra padding for the (task_count == 1) case. */ - if (cmd->se_tfo->task_sg_chaining) { - task->task_sg_nents++; + if (cmd->se_tfo->task_sg_chaining && (i < (task_count - 1))) { + task_sg_nents_padded = (task->task_sg_nents + 1); task->task_padded_sg = 1; - } + } else + task_sg_nents_padded = task->task_sg_nents; task->task_sg = kmalloc(sizeof(struct scatterlist) * - task->task_sg_nents, GFP_KERNEL); + task_sg_nents_padded, GFP_KERNEL); if (!task->task_sg) { cmd->se_dev->transport->free_task(task); return -ENOMEM; } - sg_init_table(task->task_sg, task->task_sg_nents); + sg_init_table(task->task_sg, task_sg_nents_padded); task_size = task->task_size; -- cgit v1.2.3 From 6626a0572657a0945a7b9ccf4a6d6ad1750f9adc Mon Sep 17 00:00:00 2001 From: Chris Boot Date: Sat, 13 Aug 2011 22:10:46 -0700 Subject: iscsi-target: Implement iSCSI target IPv6 address printing. The iSCSI target configfs code to print out an initiator's IPv6 address is not fully implemented. This patch uses snprintf() with the "%pI6c" format string to format the IPv6 address for display purposes. Signed-off-by: Chris Boot Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_login.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index bcaf82f47037..daad362a93ce 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -1013,19 +1013,9 @@ static int __iscsi_target_login_thread(struct iscsi_np *np) ISCSI_LOGIN_STATUS_TARGET_ERROR); goto new_sess_out; } -#if 0 - if (!iscsi_ntop6((const unsigned char *) - &sock_in6.sin6_addr.in6_u, - (char *)&conn->ipv6_login_ip[0], - IPV6_ADDRESS_SPACE)) { - pr_err("iscsi_ntop6() failed\n"); - iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, - ISCSI_LOGIN_STATUS_TARGET_ERROR); - goto new_sess_out; - } -#else - pr_debug("Skipping iscsi_ntop6()\n"); -#endif + snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c", + &sock_in6.sin6_addr.in6_u); + conn->login_port = ntohs(sock_in6.sin6_port); } else { memset(&sock_in, 0, sizeof(struct sockaddr_in)); -- cgit v1.2.3 From ba7736696341ad4253125055c0c85aa9f42959a0 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 13 Aug 2011 22:35:00 -0700 Subject: iscsi-target: Fix iscsit_allocate_se_cmd_for_tmr failure path bugs This patch fixes two bugs in allocation failure handling in iscsit_allocate_se_cmd_for_tmr(): This first reported by DanC is a free-after call to transport_free_se_cmd(), this patch drops the transport_free_se_cmd() call all together, as iscsit_release_cmd() will release existing allocations as expected. The second is a bug where iscsi_cmd_t was being leaked on a cmd->tmr_req allocation failure, so make this jump to iscsit_release_cmd() as well. Signed-off-by: Dan Carpenter Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_util.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c index a1acb0167902..a0d23bc0fc98 100644 --- a/drivers/target/iscsi/iscsi_target_util.c +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -243,7 +243,7 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr( if (!cmd->tmr_req) { pr_err("Unable to allocate memory for" " Task Management command!\n"); - return NULL; + goto out; } /* * TASK_REASSIGN for ERL=2 / connection stays inside of @@ -298,8 +298,6 @@ struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr( return cmd; out: iscsit_release_cmd(cmd); - if (se_cmd) - transport_free_se_cmd(se_cmd); return NULL; } -- cgit v1.2.3 From f15ea5780d08e4c96930c0d607d05e480ec588c8 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 12 Aug 2011 10:01:24 -0700 Subject: target: Print subpage too for unhandled MODE SENSE pages Make a log message more useful by printing both the page and subpage that an initiator is requesting. Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_cdb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c index 40ad142f7cb3..89ae923c5da6 100644 --- a/drivers/target/target_core_cdb.c +++ b/drivers/target/target_core_cdb.c @@ -920,8 +920,8 @@ target_emulate_modesense(struct se_cmd *cmd, int ten) length += target_modesense_control(dev, &buf[offset+length]); break; default: - pr_err("Got Unknown Mode Page: 0x%02x\n", - cdb[2] & 0x3f); + pr_err("MODE SENSE: unimplemented page/subpage: 0x%02x/0x%02x\n", + cdb[2] & 0x3f, cdb[3]); return PYX_TRANSPORT_UNKNOWN_MODE_PAGE; } offset += length; -- cgit v1.2.3 From 4e0f05297ff615a9a4e269da301ff77f660a3ab0 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 12 Aug 2011 10:16:52 -0700 Subject: tcm_fc: init/exit functions should not be protected by "#ifdef MODULE" There's no need for the #ifdef protection when building into the kernel, and in fact we need the module_init() for the initialization function to be called. Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/target/tcm_fc/tfc_conf.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 8781d1e423df..520a8baae794 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -655,9 +655,7 @@ static void __exit ft_exit(void) synchronize_rcu(); } -#ifdef MODULE MODULE_DESCRIPTION("FC TCM fabric driver " FT_VERSION); MODULE_LICENSE("GPL"); module_init(ft_init); module_exit(ft_exit); -#endif /* MODULE */ -- cgit v1.2.3 From e63a8e1933a2218cf801e46dd01bd8cca4a555ec Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 12 Aug 2011 16:01:02 -0700 Subject: target: Make locking in transport_deregister_session() IRQ safe At least the tcm_qla2xxx fabric driver calls into transport_deregister_session() while holding an IRQ-disabled spinlock, so the inner locking needs to use spin_lock_irqsave() instead of spin_lock_bh(). This fixes warnings seen with tcm_qla2xxx like: WARNING: at kernel/softirq.c:159 local_bh_enable_ip+0x98/0xb0() Call Trace: [] warn_slowpath_common+0x7f/0xc0 [] warn_slowpath_null+0x1a/0x20 [] local_bh_enable_ip+0x98/0xb0 [] _raw_spin_unlock_bh+0x14/0x20 [] transport_deregister_session+0x96/0x180 [target_core_mod] [] tcm_qla2xxx_free_session+0xd1/0x170 [tcm_qla2xxx] [] qla_tgt_sess_put+0xc3/0x140 [qla2xxx] [] qla_tgt_stop_phase1+0x8f/0x2c0 [qla2xxx] [] tcm_qla2xxx_tpg_store_enable+0x6e/0xd0 [tcm_qla2xxx] [] target_fabric_tpg_attr_store+0x39/0x40 [target_core_mod] [] configfs_write_file+0xbd/0x120 [configfs] [] vfs_write+0xc6/0x180 [] sys_write+0x51/0x90 [] system_call_fastpath+0x16/0x1b Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 9cc49d1b5b1f..8d0c58ea6316 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -389,17 +389,18 @@ void transport_deregister_session(struct se_session *se_sess) { struct se_portal_group *se_tpg = se_sess->se_tpg; struct se_node_acl *se_nacl; + unsigned long flags; if (!se_tpg) { transport_free_session(se_sess); return; } - spin_lock_bh(&se_tpg->session_lock); + spin_lock_irqsave(&se_tpg->session_lock, flags); list_del(&se_sess->sess_list); se_sess->se_tpg = NULL; se_sess->fabric_sess_ptr = NULL; - spin_unlock_bh(&se_tpg->session_lock); + spin_unlock_irqrestore(&se_tpg->session_lock, flags); /* * Determine if we need to do extra work for this initiator node's @@ -407,22 +408,22 @@ void transport_deregister_session(struct se_session *se_sess) */ se_nacl = se_sess->se_node_acl; if (se_nacl) { - spin_lock_bh(&se_tpg->acl_node_lock); + spin_lock_irqsave(&se_tpg->acl_node_lock, flags); if (se_nacl->dynamic_node_acl) { if (!se_tpg->se_tpg_tfo->tpg_check_demo_mode_cache( se_tpg)) { list_del(&se_nacl->acl_list); se_tpg->num_node_acls--; - spin_unlock_bh(&se_tpg->acl_node_lock); + spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags); core_tpg_wait_for_nacl_pr_ref(se_nacl); core_free_device_list_for_node(se_nacl, se_tpg); se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg, se_nacl); - spin_lock_bh(&se_tpg->acl_node_lock); + spin_lock_irqsave(&se_tpg->acl_node_lock, flags); } } - spin_unlock_bh(&se_tpg->acl_node_lock); + spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags); } transport_free_session(se_sess); -- cgit v1.2.3 From 28638887f351d11867562322b7abaa014dd5528a Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 16 Aug 2011 09:40:01 -0700 Subject: target: Convert acl_node_lock to be IRQ-disabling With qla2xxx, acl_node_lock is taken inside qla2xxx's hardware_lock, which is taken in hardirq context. This means acl_node_lock must become an IRQ-disabling lock; in particular this fixes lockdep warnings along the lines of ====================================================== [ INFO: HARDIRQ-safe -> HARDIRQ-unsafe lock order detected ] (&(&se_tpg->acl_node_lock)->rlock){+.....}, at: [] transport_deregister_session+0x92/0x140 [target_core_mod] and this task is already holding: (&(&ha->hardware_lock)->rlock){-.-...}, at: [] qla_tgt_stop_phase1+0x57/0x2c0 [qla2xxx] which would create a new lock dependency: (&(&ha->hardware_lock)->rlock){-.-...} -> (&(&se_tpg->acl_node_lock)->rlock){+.....} but this new dependency connects a HARDIRQ-irq-safe lock: (&(&ha->hardware_lock)->rlock){-.-...} to a HARDIRQ-irq-unsafe lock: (&(&se_tpg->acl_node_lock)->rlock){+.....} Signed-off-by: Roland Dreier Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 16 ++++++------ drivers/target/target_core_pr.c | 8 +++--- drivers/target/target_core_tpg.c | 52 ++++++++++++++++++------------------- drivers/target/tcm_fc/tfc_conf.c | 4 +-- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 4b5237f500a5..ca6e4a4df134 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -472,9 +472,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg) struct se_dev_entry *deve; u32 i; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); list_for_each_entry(nacl, &tpg->acl_node_list, acl_list) { - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); spin_lock_irq(&nacl->device_list_lock); for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) { @@ -491,9 +491,9 @@ void core_clear_lun_from_tpg(struct se_lun *lun, struct se_portal_group *tpg) } spin_unlock_irq(&nacl->device_list_lock); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); } - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); } static struct se_port *core_alloc_port(struct se_device *dev) @@ -1372,17 +1372,17 @@ struct se_lun *core_dev_add_lun( */ if (tpg->se_tpg_tfo->tpg_check_demo_mode(tpg)) { struct se_node_acl *acl; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); list_for_each_entry(acl, &tpg->acl_node_list, acl_list) { if (acl->dynamic_node_acl && (!tpg->se_tpg_tfo->tpg_check_demo_mode_login_only || !tpg->se_tpg_tfo->tpg_check_demo_mode_login_only(tpg))) { - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); core_tpg_add_node_to_devs(acl, tpg); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); } } - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); } return lun_p; diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 1c1b849cd4fb..7fd3a161f7cc 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1598,14 +1598,14 @@ static int core_scsi3_decode_spec_i_port( * from the decoded fabric module specific TransportID * at *i_str. */ - spin_lock_bh(&tmp_tpg->acl_node_lock); + spin_lock_irq(&tmp_tpg->acl_node_lock); dest_node_acl = __core_tpg_get_initiator_node_acl( tmp_tpg, i_str); if (dest_node_acl) { atomic_inc(&dest_node_acl->acl_pr_ref_count); smp_mb__after_atomic_inc(); } - spin_unlock_bh(&tmp_tpg->acl_node_lock); + spin_unlock_irq(&tmp_tpg->acl_node_lock); if (!dest_node_acl) { core_scsi3_tpg_undepend_item(tmp_tpg); @@ -3496,14 +3496,14 @@ after_iport_check: /* * Locate the destination struct se_node_acl from the received Transport ID */ - spin_lock_bh(&dest_se_tpg->acl_node_lock); + spin_lock_irq(&dest_se_tpg->acl_node_lock); dest_node_acl = __core_tpg_get_initiator_node_acl(dest_se_tpg, initiator_str); if (dest_node_acl) { atomic_inc(&dest_node_acl->acl_pr_ref_count); smp_mb__after_atomic_inc(); } - spin_unlock_bh(&dest_se_tpg->acl_node_lock); + spin_unlock_irq(&dest_se_tpg->acl_node_lock); if (!dest_node_acl) { pr_err("Unable to locate %s dest_node_acl for" diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index 718ccd1348b1..162b736c7342 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -137,15 +137,15 @@ struct se_node_acl *core_tpg_get_initiator_node_acl( { struct se_node_acl *acl; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); list_for_each_entry(acl, &tpg->acl_node_list, acl_list) { if (!strcmp(acl->initiatorname, initiatorname) && !acl->dynamic_node_acl) { - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return acl; } } - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return NULL; } @@ -309,10 +309,10 @@ struct se_node_acl *core_tpg_check_initiator_node_acl( else core_tpg_add_node_to_devs(acl, tpg); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); list_add_tail(&acl->acl_list, &tpg->acl_node_list); tpg->num_node_acls++; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); pr_debug("%s_TPG[%u] - Added DYNAMIC ACL with TCQ Depth: %d for %s" " Initiator Node: %s\n", tpg->se_tpg_tfo->get_fabric_name(), @@ -362,7 +362,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( { struct se_node_acl *acl = NULL; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); if (acl) { if (acl->dynamic_node_acl) { @@ -370,7 +370,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( pr_debug("%s_TPG[%u] - Replacing dynamic ACL" " for %s\n", tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg), initiatorname); - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); /* * Release the locally allocated struct se_node_acl * because * core_tpg_add_initiator_node_acl() returned @@ -386,10 +386,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( " Node %s already exists for TPG %u, ignoring" " request.\n", tpg->se_tpg_tfo->get_fabric_name(), initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return ERR_PTR(-EEXIST); } - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); if (!se_nacl) { pr_err("struct se_node_acl pointer is NULL\n"); @@ -426,10 +426,10 @@ struct se_node_acl *core_tpg_add_initiator_node_acl( return ERR_PTR(-EINVAL); } - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); list_add_tail(&acl->acl_list, &tpg->acl_node_list); tpg->num_node_acls++; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); done: pr_debug("%s_TPG[%hu] - Added ACL with TCQ Depth: %d for %s" @@ -453,14 +453,14 @@ int core_tpg_del_initiator_node_acl( struct se_session *sess, *sess_tmp; int dynamic_acl = 0; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); if (acl->dynamic_node_acl) { acl->dynamic_node_acl = 0; dynamic_acl = 1; } list_del(&acl->acl_list); tpg->num_node_acls--; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); spin_lock_bh(&tpg->session_lock); list_for_each_entry_safe(sess, sess_tmp, @@ -511,21 +511,21 @@ int core_tpg_set_initiator_node_queue_depth( struct se_node_acl *acl; int dynamic_acl = 0; - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); acl = __core_tpg_get_initiator_node_acl(tpg, initiatorname); if (!acl) { pr_err("Access Control List entry for %s Initiator" " Node %s does not exists for TPG %hu, ignoring" " request.\n", tpg->se_tpg_tfo->get_fabric_name(), initiatorname, tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return -ENODEV; } if (acl->dynamic_node_acl) { acl->dynamic_node_acl = 0; dynamic_acl = 1; } - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); spin_lock_bh(&tpg->session_lock); list_for_each_entry(sess, &tpg->tpg_sess_list, sess_list) { @@ -541,10 +541,10 @@ int core_tpg_set_initiator_node_queue_depth( tpg->se_tpg_tfo->get_fabric_name(), initiatorname); spin_unlock_bh(&tpg->session_lock); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return -EEXIST; } /* @@ -579,10 +579,10 @@ int core_tpg_set_initiator_node_queue_depth( if (init_sess) tpg->se_tpg_tfo->close_session(init_sess); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return -EINVAL; } spin_unlock_bh(&tpg->session_lock); @@ -598,10 +598,10 @@ int core_tpg_set_initiator_node_queue_depth( initiatorname, tpg->se_tpg_tfo->get_fabric_name(), tpg->se_tpg_tfo->tpg_get_tag(tpg)); - spin_lock_bh(&tpg->acl_node_lock); + spin_lock_irq(&tpg->acl_node_lock); if (dynamic_acl) acl->dynamic_node_acl = 1; - spin_unlock_bh(&tpg->acl_node_lock); + spin_unlock_irq(&tpg->acl_node_lock); return 0; } @@ -725,20 +725,20 @@ int core_tpg_deregister(struct se_portal_group *se_tpg) * not been released because of TFO->tpg_check_demo_mode_cache() == 1 * in transport_deregister_session(). */ - spin_lock_bh(&se_tpg->acl_node_lock); + spin_lock_irq(&se_tpg->acl_node_lock); list_for_each_entry_safe(nacl, nacl_tmp, &se_tpg->acl_node_list, acl_list) { list_del(&nacl->acl_list); se_tpg->num_node_acls--; - spin_unlock_bh(&se_tpg->acl_node_lock); + spin_unlock_irq(&se_tpg->acl_node_lock); core_tpg_wait_for_nacl_pr_ref(nacl); core_free_device_list_for_node(nacl, se_tpg); se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg, nacl); - spin_lock_bh(&se_tpg->acl_node_lock); + spin_lock_irq(&se_tpg->acl_node_lock); } - spin_unlock_bh(&se_tpg->acl_node_lock); + spin_unlock_irq(&se_tpg->acl_node_lock); if (se_tpg->se_tpg_type == TRANSPORT_TPG_TYPE_NORMAL) core_tpg_release_virtual_lun0(se_tpg); diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c index 520a8baae794..b15879d43e22 100644 --- a/drivers/target/tcm_fc/tfc_conf.c +++ b/drivers/target/tcm_fc/tfc_conf.c @@ -256,7 +256,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) struct se_portal_group *se_tpg = &tpg->se_tpg; struct se_node_acl *se_acl; - spin_lock_bh(&se_tpg->acl_node_lock); + spin_lock_irq(&se_tpg->acl_node_lock); list_for_each_entry(se_acl, &se_tpg->acl_node_list, acl_list) { acl = container_of(se_acl, struct ft_node_acl, se_node_acl); pr_debug("acl %p port_name %llx\n", @@ -270,7 +270,7 @@ struct ft_node_acl *ft_acl_get(struct ft_tpg *tpg, struct fc_rport_priv *rdata) break; } } - spin_unlock_bh(&se_tpg->acl_node_lock); + spin_unlock_irq(&se_tpg->acl_node_lock); return found; } -- cgit v1.2.3 From 259a187ade45056fd44856654f78aa9e9f0f7c75 Mon Sep 17 00:00:00 2001 From: Noah Watkins Date: Mon, 22 Aug 2011 13:49:41 -0600 Subject: ceph: fix memory leak kfree does not clean up indirect allocations in ceph_fs_client and ceph_options (e.g. snapdir_name). Signed-off-by: Noah Watkins Signed-off-by: Sage Weil --- fs/ceph/super.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index d47c5ec7fb1f..88bacaf385d9 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -813,8 +813,8 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, fsc = create_fs_client(fsopt, opt); if (IS_ERR(fsc)) { res = ERR_CAST(fsc); - kfree(fsopt); - kfree(opt); + destroy_mount_options(fsopt); + ceph_destroy_options(opt); goto out_final; } -- cgit v1.2.3 From 0e69d75ccb2f091757b38d4d6a2ed739e06b615e Mon Sep 17 00:00:00 2001 From: Andrew Bird Date: Tue, 16 Aug 2011 13:57:14 -0600 Subject: USB option driver add PID of Huawei Vodafone K3806 This patch adds the product ID of Huawei's Vodafone K3806 mobile broadband modem to option.c. This is necessary so that the driver gets loaded on demand without the intervention of usb_modeswitch. This has the benefit of it becoming available faster and also ensures that the option driver is not bound to a network interface that should be claimed by cdc_ether. Signed-off-by: Andrew Bird Signed-off-by: Alex Chiang Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 815656198914..6e042229e4be 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -148,6 +148,7 @@ static void option_instat_callback(struct urb *urb); #define HUAWEI_PRODUCT_K4505 0x1464 #define HUAWEI_PRODUCT_K3765 0x1465 #define HUAWEI_PRODUCT_E14AC 0x14AC +#define HUAWEI_PRODUCT_K3806 0x14AE #define HUAWEI_PRODUCT_K3770 0x14C9 #define HUAWEI_PRODUCT_K3771 0x14CA #define HUAWEI_PRODUCT_K4510 0x14CB @@ -551,6 +552,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3765, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) }, -- cgit v1.2.3 From 7e1805844da18a37e6d251d286f93c94b52d791e Mon Sep 17 00:00:00 2001 From: Andrew Bird Date: Tue, 16 Aug 2011 13:58:21 -0600 Subject: USB option driver add PID of Huawei Vodafone K4605 This patch adds the product ID of Huawei's Vodafone K4605 mobile broadband modem to option.c. This is necessary so that the driver gets loaded on demand without the intervention of usb_modeswitch. This has the benefit of it becoming available faster and also ensures that the option driver is not bound to a network interface that should be claimed by suitable network driver. Signed-off-by: Andrew Bird Signed-off-by: Alex Chiang Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 6e042229e4be..ab86e0dd7f33 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -149,6 +149,7 @@ static void option_instat_callback(struct urb *urb); #define HUAWEI_PRODUCT_K3765 0x1465 #define HUAWEI_PRODUCT_E14AC 0x14AC #define HUAWEI_PRODUCT_K3806 0x14AE +#define HUAWEI_PRODUCT_K4605 0x14C6 #define HUAWEI_PRODUCT_K3770 0x14C9 #define HUAWEI_PRODUCT_K3771 0x14CA #define HUAWEI_PRODUCT_K4510 0x14CB @@ -553,6 +554,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3806, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4605, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x31) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3770, 0xff, 0x02, 0x32) }, { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3771, 0xff, 0x02, 0x31) }, @@ -1136,10 +1138,11 @@ static int option_probe(struct usb_serial *serial, serial->interface->cur_altsetting->desc.bInterfaceClass != 0xff) return -ENODEV; - /* Don't bind network interfaces on Huawei K3765 & K4505 */ + /* Don't bind network interfaces on Huawei K3765, K4505 & K4605 */ if (serial->dev->descriptor.idVendor == HUAWEI_VENDOR_ID && (serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K3765 || - serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505) && + serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505 || + serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4605) && serial->interface->cur_altsetting->desc.bInterfaceNumber == 1) return -ENODEV; -- cgit v1.2.3 From d0f2fb2500b1c5fe4967eb45d8c9bc758d7aef80 Mon Sep 17 00:00:00 2001 From: Wang Zhi Date: Wed, 17 Aug 2011 10:39:31 +0800 Subject: USB: EHCI: Do not rely on PORT_SUSPEND to stop USB resuming in ehci_bus_resume(). From EHCI Spec p.28 HC should clear PORT_SUSPEND when SW clears PORT_RESUME. In Intel Oaktrail platform, MPH (Multi-Port Host Controller) core clears PORT_SUSPEND directly when SW sets PORT_RESUME bit. If we rely on PORT_SUSPEND bit to stop USB resume, we will miss the action of clearing PORT_RESUME. This will cause unexpected long resume signal on USB bus. Signed-off-by: Wang Zhi Signed-off-by: Alan Stern Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-hub.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c index e051b30c1847..4c32cb19b405 100644 --- a/drivers/usb/host/ehci-hub.c +++ b/drivers/usb/host/ehci-hub.c @@ -343,7 +343,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd) u32 temp; u32 power_okay; int i; - u8 resume_needed = 0; + unsigned long resume_needed = 0; if (time_before (jiffies, ehci->next_statechange)) msleep(5); @@ -416,7 +416,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd) if (test_bit(i, &ehci->bus_suspended) && (temp & PORT_SUSPEND)) { temp |= PORT_RESUME; - resume_needed = 1; + set_bit(i, &resume_needed); } ehci_writel(ehci, temp, &ehci->regs->port_status [i]); } @@ -431,8 +431,7 @@ static int ehci_bus_resume (struct usb_hcd *hcd) i = HCS_N_PORTS (ehci->hcs_params); while (i--) { temp = ehci_readl(ehci, &ehci->regs->port_status [i]); - if (test_bit(i, &ehci->bus_suspended) && - (temp & PORT_SUSPEND)) { + if (test_bit(i, &resume_needed)) { temp &= ~(PORT_RWC_BITS | PORT_RESUME); ehci_writel(ehci, temp, &ehci->regs->port_status [i]); ehci_vdbg (ehci, "resumed port %d\n", i + 1); -- cgit v1.2.3 From e5d3d4463fb30998385f9e78ab3c7f63b5813000 Mon Sep 17 00:00:00 2001 From: Yulgon Kim Date: Thu, 18 Aug 2011 14:02:45 +0900 Subject: usb: s5p-ehci: fix a NULL pointer deference This patch fixes a NULL pointer deference. A NULL pointer dereference happens since s5p_ehci->hcd field is not initialized yet in probe function. [jg1.han@samsung.com: edit commit message] Signed-off-by: Yulgon Kim Signed-off-by: Jingoo Han Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-s5p.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/ehci-s5p.c b/drivers/usb/host/ehci-s5p.c index b3958b3d3163..9e77f1c8bdbd 100644 --- a/drivers/usb/host/ehci-s5p.c +++ b/drivers/usb/host/ehci-s5p.c @@ -86,6 +86,7 @@ static int __devinit s5p_ehci_probe(struct platform_device *pdev) goto fail_hcd; } + s5p_ehci->hcd = hcd; s5p_ehci->clk = clk_get(&pdev->dev, "usbhost"); if (IS_ERR(s5p_ehci->clk)) { -- cgit v1.2.3 From c6eb2d75ffcdfafa37ff010bf467de20d468ef79 Mon Sep 17 00:00:00 2001 From: "Gavin.zhu" Date: Mon, 22 Aug 2011 13:51:53 -0700 Subject: USB: option: add YUGA device id to driver Signed-off-by: Gavin.zhu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 92 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index ab86e0dd7f33..78452baca88b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -418,6 +418,56 @@ static void option_instat_callback(struct urb *urb); #define SAMSUNG_VENDOR_ID 0x04e8 #define SAMSUNG_PRODUCT_GT_B3730 0x6889 +/* YUGA products www.yuga-info.com*/ +#define YUGA_VENDOR_ID 0x257A +#define YUGA_PRODUCT_CEM600 0x1601 +#define YUGA_PRODUCT_CEM610 0x1602 +#define YUGA_PRODUCT_CEM500 0x1603 +#define YUGA_PRODUCT_CEM510 0x1604 +#define YUGA_PRODUCT_CEM800 0x1605 +#define YUGA_PRODUCT_CEM900 0x1606 + +#define YUGA_PRODUCT_CEU818 0x1607 +#define YUGA_PRODUCT_CEU816 0x1608 +#define YUGA_PRODUCT_CEU828 0x1609 +#define YUGA_PRODUCT_CEU826 0x160A +#define YUGA_PRODUCT_CEU518 0x160B +#define YUGA_PRODUCT_CEU516 0x160C +#define YUGA_PRODUCT_CEU528 0x160D +#define YUGA_PRODUCT_CEU526 0x160F + +#define YUGA_PRODUCT_CWM600 0x2601 +#define YUGA_PRODUCT_CWM610 0x2602 +#define YUGA_PRODUCT_CWM500 0x2603 +#define YUGA_PRODUCT_CWM510 0x2604 +#define YUGA_PRODUCT_CWM800 0x2605 +#define YUGA_PRODUCT_CWM900 0x2606 + +#define YUGA_PRODUCT_CWU718 0x2607 +#define YUGA_PRODUCT_CWU716 0x2608 +#define YUGA_PRODUCT_CWU728 0x2609 +#define YUGA_PRODUCT_CWU726 0x260A +#define YUGA_PRODUCT_CWU518 0x260B +#define YUGA_PRODUCT_CWU516 0x260C +#define YUGA_PRODUCT_CWU528 0x260D +#define YUGA_PRODUCT_CWU526 0x260F + +#define YUGA_PRODUCT_CLM600 0x2601 +#define YUGA_PRODUCT_CLM610 0x2602 +#define YUGA_PRODUCT_CLM500 0x2603 +#define YUGA_PRODUCT_CLM510 0x2604 +#define YUGA_PRODUCT_CLM800 0x2605 +#define YUGA_PRODUCT_CLM900 0x2606 + +#define YUGA_PRODUCT_CLU718 0x2607 +#define YUGA_PRODUCT_CLU716 0x2608 +#define YUGA_PRODUCT_CLU728 0x2609 +#define YUGA_PRODUCT_CLU726 0x260A +#define YUGA_PRODUCT_CLU518 0x260B +#define YUGA_PRODUCT_CLU516 0x260C +#define YUGA_PRODUCT_CLU528 0x260D +#define YUGA_PRODUCT_CLU526 0x260F + /* some devices interfaces need special handling due to a number of reasons */ enum option_blacklist_reason { OPTION_BLACKLIST_NONE = 0, @@ -1009,6 +1059,48 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(CELOT_VENDOR_ID, CELOT_PRODUCT_CT680M) }, /* CT-650 CDMA 450 1xEVDO modem */ { USB_DEVICE(ONDA_VENDOR_ID, ONDA_MT825UP) }, /* ONDA MT825UP modem */ { USB_DEVICE_AND_INTERFACE_INFO(SAMSUNG_VENDOR_ID, SAMSUNG_PRODUCT_GT_B3730, USB_CLASS_CDC_DATA, 0x00, 0x00) }, /* Samsung GT-B3730 LTE USB modem.*/ + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM600) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM610) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM500) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM510) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM800) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEM900) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU818) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU816) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU828) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU826) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU518) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU516) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU528) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CEU526) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM600) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM610) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM500) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM510) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM800) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWM900) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU718) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU716) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU728) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU726) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU518) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU516) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU528) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CWU526) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM600) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM610) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM500) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM510) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM800) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLM900) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU718) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU716) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU728) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU726) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU518) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU516) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU528) }, + { USB_DEVICE(YUGA_VENDOR_ID, YUGA_PRODUCT_CLU526) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, option_ids); -- cgit v1.2.3 From 6118514e8749105334f46ccec6faf9a439be6cf9 Mon Sep 17 00:00:00 2001 From: Andrew Bird Date: Wed, 17 Aug 2011 00:20:03 +0100 Subject: USB option driver K3765/K4505 avoid CDC_DATA interface Currently the Option driver avoids binding interface 1 on Huawei K3765 and K4505 broadband modems as it should be handled by the cdc_ether driver instead. This patch ensures we don't bind the interface 2 on those devices as that is CDC_DATA. Signed-off-by: Andrew Bird Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 78452baca88b..fe22e90bc879 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1235,7 +1235,8 @@ static int option_probe(struct usb_serial *serial, (serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K3765 || serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505 || serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4605) && - serial->interface->cur_altsetting->desc.bInterfaceNumber == 1) + (serial->interface->cur_altsetting->desc.bInterfaceNumber == 1 || + serial->interface->cur_altsetting->desc.bInterfaceNumber == 2)) return -ENODEV; /* Don't bind network interface on Samsung GT-B3730, it is handled by a separate module */ -- cgit v1.2.3 From 858a914324c7786f483661e3a89bc8fbe50f1b9d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 16 Aug 2011 08:15:26 -0700 Subject: hwmon: (ntc_thermistor) Simplify if sequence Replace unnecessary if with else statement. This fixes the following (false) compile warning reported with some combinations of C compiler version and configuration. drivers/hwmon/ntc_thermistor.c: In function 'ntc_show_temp': drivers/hwmon/ntc_thermistor.c:225: warning: 'low' may be used uninitialized in this function drivers/hwmon/ntc_thermistor.c:225: note: 'low' was declared here drivers/hwmon/ntc_thermistor.c:225: warning: 'high' may be used uninitialized in this function drivers/hwmon/ntc_thermistor.c:225: note: 'high' was declared here drivers/hwmon/ntc_thermistor.c:294: warning: 'temp' may be used uninitialized in this function Signed-off-by: Guenter Roeck Acked-by: Jean Delvare --- drivers/hwmon/ntc_thermistor.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c index d7926f4336b5..eab11615dced 100644 --- a/drivers/hwmon/ntc_thermistor.c +++ b/drivers/hwmon/ntc_thermistor.c @@ -211,8 +211,7 @@ static int lookup_comp(struct ntc_data *data, if (data->comp[mid].ohm <= ohm) { *i_low = mid; *i_high = mid - 1; - } - if (data->comp[mid].ohm > ohm) { + } else { *i_low = mid + 1; *i_high = mid; } -- cgit v1.2.3 From 8ea95e08711f504d83281e762ca65a849a89593e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 2 Aug 2011 10:08:34 +0200 Subject: pti: add missing CONFIG_PCI dependency allmodconfig compile fails on s390 because of the new PTI driver: drivers/misc/pti.c:407:3: error: implicit declaration of function 'pci_iounmap' drivers/misc/pti.c:410:3: error: implicit declaration of function 'pci_release_region' Add a 'depends on PCI' statement so it doesn't get compiled. Cc: J Freyensee Signed-off-by: Tracey Dent Acked-by: Randy Dunlap Signed-off-by: Sergei Trofimovich Signed-off-by: Heiko Carstens Signed-off-by: Greg Kroah-Hartman --- drivers/misc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 0a4d86c6c4a4..2d6423c2d193 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -146,6 +146,7 @@ config PHANTOM config INTEL_MID_PTI tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard" + depends on PCI default n help The PTI (Parallel Trace Interface) driver directs -- cgit v1.2.3 From 86ec67fd0a28c7f2b765e33aaf5b002d28c5f1fa Mon Sep 17 00:00:00 2001 From: H Hartley Sweeten Date: Fri, 12 Aug 2011 14:58:31 -0700 Subject: base/devres.c: quiet sparse noise about context imbalance devres_release_all and devres_release_group both aquire the lock &dev->devres_lock but the release of that lock is done in release_nodes. This results in sparse noise about context imbalance. Add a lock annotation to release_nodes to quiet this noise. Signed-off-by: H Hartley Sweeten Signed-off-by: Greg Kroah-Hartman --- drivers/base/devres.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/devres.c b/drivers/base/devres.c index cf7a0c788052..65cd74832450 100644 --- a/drivers/base/devres.c +++ b/drivers/base/devres.c @@ -397,6 +397,7 @@ static int remove_nodes(struct device *dev, static int release_nodes(struct device *dev, struct list_head *first, struct list_head *end, unsigned long flags) + __releases(&dev->devres_lock) { LIST_HEAD(todo); int cnt; -- cgit v1.2.3 From 5926cef26c72cd121266b000b8975e6373cbf2b3 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:30 -0500 Subject: drivers:misc: ti-st: avoid a misleading dbg msg Previously the private data of each protocol registered to use ST was used to determine whether the protocol was registered to use shared transport or otherwise. However, now a flag is_registered is maintained to identify whether a protocol intends to use ST. Upon closing of the UART the error message relevant to this lack of un-registration was misleading and this patch fixes that. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 54c91ffe4a91..c8e335db3451 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -717,7 +717,7 @@ static void st_tty_close(struct tty_struct *tty) */ spin_lock_irqsave(&st_gdata->lock, flags); for (i = ST_BT; i < ST_MAX_CHANNELS; i++) { - if (st_gdata->list[i] != NULL) + if (st_gdata->is_registered[i] == true) pr_err("%d not un-registered", i); st_gdata->list[i] = NULL; } -- cgit v1.2.3 From 0d7c5f2572ccfa7bf83292b1506926663f2d164a Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:31 -0500 Subject: drivers:misc:ti-st: platform hooks for chip states Certain platform specific or Host-WiLink Interface specific actions would be required to be taken when the chip is being enabled and after the chip is disabled such as configuration of the mux modes for the GPIO of host connected to the nshutdown of the chip or relinquishing UART after the chip is disabled. Similar actions can also be taken when the chip is in deep sleep or when the chip is awake. Performance enhancements such as configuring the host to run faster when chip is awake and slower when chip is asleep can also be made here. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 12 ++++++++++++ drivers/misc/ti-st/st_ll.c | 19 +++++++++++++++++++ include/linux/ti_wilink_st.h | 27 ++++++++++++++++++++++++++- 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index 38fd2f04c07e..6884dd1c997b 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -434,11 +434,17 @@ long st_kim_start(void *kim_data) { long err = 0; long retry = POR_RETRY_COUNT; + struct ti_st_plat_data *pdata; struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; pr_info(" %s", __func__); + pdata = kim_gdata->kim_pdev->dev.platform_data; do { + /* platform specific enabling code here */ + if (pdata->chip_enable) + pdata->chip_enable(kim_gdata); + /* Configure BT nShutdown to HIGH state */ gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); mdelay(5); /* FIXME: a proper toggle */ @@ -489,6 +495,8 @@ long st_kim_stop(void *kim_data) { long err = 0; struct kim_data_s *kim_gdata = (struct kim_data_s *)kim_data; + struct ti_st_plat_data *pdata = + kim_gdata->kim_pdev->dev.platform_data; INIT_COMPLETION(kim_gdata->ldisc_installed); @@ -515,6 +523,10 @@ long st_kim_stop(void *kim_data) gpio_set_value(kim_gdata->nshutdown, GPIO_HIGH); mdelay(1); gpio_set_value(kim_gdata->nshutdown, GPIO_LOW); + + /* platform specific disable */ + if (pdata->chip_disable) + pdata->chip_disable(kim_gdata); return err; } diff --git a/drivers/misc/ti-st/st_ll.c b/drivers/misc/ti-st/st_ll.c index 3f2495138855..1ff460a8e9c7 100644 --- a/drivers/misc/ti-st/st_ll.c +++ b/drivers/misc/ti-st/st_ll.c @@ -22,6 +22,7 @@ #define pr_fmt(fmt) "(stll) :" fmt #include #include +#include #include /**********************************************************************/ @@ -37,6 +38,9 @@ static void send_ll_cmd(struct st_data_s *st_data, static void ll_device_want_to_sleep(struct st_data_s *st_data) { + struct kim_data_s *kim_data; + struct ti_st_plat_data *pdata; + pr_debug("%s", __func__); /* sanity check */ if (st_data->ll_state != ST_LL_AWAKE) @@ -46,10 +50,19 @@ static void ll_device_want_to_sleep(struct st_data_s *st_data) send_ll_cmd(st_data, LL_SLEEP_ACK); /* update state */ st_data->ll_state = ST_LL_ASLEEP; + + /* communicate to platform about chip asleep */ + kim_data = st_data->kim_data; + pdata = kim_data->kim_pdev->dev.platform_data; + if (pdata->chip_asleep) + pdata->chip_asleep(NULL); } static void ll_device_want_to_wakeup(struct st_data_s *st_data) { + struct kim_data_s *kim_data; + struct ti_st_plat_data *pdata; + /* diff actions in diff states */ switch (st_data->ll_state) { case ST_LL_ASLEEP: @@ -70,6 +83,12 @@ static void ll_device_want_to_wakeup(struct st_data_s *st_data) } /* update state */ st_data->ll_state = ST_LL_AWAKE; + + /* communicate to platform about chip wakeup */ + kim_data = st_data->kim_data; + pdata = kim_data->kim_pdev->dev.platform_data; + if (pdata->chip_asleep) + pdata->chip_awake(NULL); } /**********************************************************************/ diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index b004e557caa9..2ef4385da6bf 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -410,7 +410,28 @@ struct gps_event_hdr { u16 plen; } __attribute__ ((packed)); -/* platform data */ +/** + * struct ti_st_plat_data - platform data shared between ST driver and + * platform specific board file which adds the ST device. + * @nshutdown_gpio: Host's GPIO line to which chip's BT_EN is connected. + * @dev_name: The UART/TTY name to which chip is interfaced. (eg: /dev/ttyS1) + * @flow_cntrl: Should always be 1, since UART's CTS/RTS is used for PM + * purposes. + * @baud_rate: The baud rate supported by the Host UART controller, this will + * be shared across with the chip via a HCI VS command from User-Space Init + * Mgr application. + * @suspend: + * @resume: legacy PM routines hooked to platform specific board file, so as + * to take chip-host interface specific action. + * @chip_enable: + * @chip_disable: Platform/Interface specific mux mode setting, GPIO + * configuring, Host side PM disabling etc.. can be done here. + * @chip_asleep: + * @chip_awake: Chip specific deep sleep states is communicated to Host + * specific board-xx.c to take actions such as cut UART clocks when chip + * asleep or run host faster when chip awake etc.. + * + */ struct ti_st_plat_data { long nshutdown_gpio; unsigned char dev_name[UART_DEV_NAME_LEN]; /* uart name */ @@ -418,6 +439,10 @@ struct ti_st_plat_data { unsigned long baud_rate; int (*suspend)(struct platform_device *, pm_message_t); int (*resume)(struct platform_device *); + int (*chip_enable) (struct kim_data_s *); + int (*chip_disable) (struct kim_data_s *); + int (*chip_asleep) (struct kim_data_s *); + int (*chip_awake) (struct kim_data_s *); }; #endif /* TI_WILINK_ST_H */ -- cgit v1.2.3 From 74a4fcf19eed6550651f455db5741fd216b4f004 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:32 -0500 Subject: drivers:misc: ti-st: reinit completion on ver read After the version information has been read, the completion which assists in wait_for_completion during the firmware send/wait sequence is being re-used and hence this needs to be re-initialised for fool proof firmware download retries. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index 6884dd1c997b..e5639ca97dce 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -210,6 +210,7 @@ static long read_local_version(struct kim_data_s *kim_gdata, char *bts_scr_name) pr_err(" waiting for ver info- timed out "); return -ETIMEDOUT; } + INIT_COMPLETION(kim_gdata->kim_rcvd); version = MAKEWORD(kim_gdata->resp_buffer[13], -- cgit v1.2.3 From 78bb9697e2c4b62c426f1a2571c293a2e4463adf Mon Sep 17 00:00:00 2001 From: Vijay Badawadagi Date: Wed, 10 Aug 2011 10:18:33 -0500 Subject: drivers:misc: ti-st: fail-safe on wrong pkt type Texas Instrument's shared transport driver interpret incoming data from the UART based on the various protocol drivers registered to the driver such as btwilink driver or FM or GPS driver which provide logical channel IDs. In case of bad-behavior from chip such as HCI Event response for a GPS command or a HCI Event (h/w error event) for a FM response & In case of bad-behavior from UART driver such as dropping data bytes a fail-safe is required to avoid kernel panic. Signed-off-by: Pavan Savoy Signed-off-by: Vijay Badawadagi Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index c8e335db3451..1f973ce3043f 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -338,6 +338,12 @@ void st_int_recv(void *disc_data, /* Unknow packet? */ default: type = *ptr; + if (st_gdata->list[type] == NULL) { + pr_err("chip/interface misbehavior dropping" + " frame starting with 0x%02x", type); + goto done; + + } st_gdata->rx_skb = alloc_skb( st_gdata->list[type]->max_frame_size, GFP_ATOMIC); @@ -354,6 +360,7 @@ void st_int_recv(void *disc_data, ptr++; count--; } +done: spin_unlock_irqrestore(&st_gdata->lock, flags); pr_debug("done %s", __func__); return; -- cgit v1.2.3 From 2f81a02ce0693863019dc3fcc532533af6dc0dcd Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:34 -0500 Subject: drivers:misc: ti-st: reinit completion before send download firmware behaves differently at different times, when logs are enabled and the system is loaded, the wait_for_completion is able to wait for every send, However during other times the wait does not happen. So, for reliability reinitializing the completion before every send, makes sure the wait happens for every send. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index e5639ca97dce..1748a9351de0 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -299,6 +299,7 @@ static long download_firmware(struct kim_data_s *kim_gdata) switch (((struct bts_action *)ptr)->type) { case ACTION_SEND_COMMAND: /* action send */ + pr_debug("S"); action_ptr = &(((struct bts_action *)ptr)->data[0]); if (unlikely (((struct hci_command *)action_ptr)->opcode == @@ -336,6 +337,10 @@ static long download_firmware(struct kim_data_s *kim_gdata) release_firmware(kim_gdata->fw_entry); return -ETIMEDOUT; } + /* reinit completion before sending for the + * relevant wait + */ + INIT_COMPLETION(kim_gdata->kim_rcvd); /* * Free space found in uart buffer, call st_int_write @@ -362,6 +367,7 @@ static long download_firmware(struct kim_data_s *kim_gdata) } break; case ACTION_WAIT_EVENT: /* wait */ + pr_debug("W"); if (!wait_for_completion_timeout (&kim_gdata->kim_rcvd, msecs_to_jiffies(CMD_RESP_TIME))) { -- cgit v1.2.3 From d0344ef670d686628f369e649c86f71c90ebe222 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:35 -0500 Subject: drivers:misc: ti-st: wait for completion at fail When the line discipline install fails for reasons such as missing user-space UIM or broken communication between UIM and ST driver, then the ST attempts/retries to request for ldisc installation again. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index 1748a9351de0..d8ca4068a928 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -473,6 +473,12 @@ long st_kim_start(void *kim_data) pr_info("ldisc_install = 0"); sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, NULL, "install"); + /* the following wait is never going to be completed, + * since the ldisc was never installed, hence serving + * as a mdelay of LDISC_TIME msecs */ + err = wait_for_completion_timeout + (&kim_gdata->ldisc_installed, + msecs_to_jiffies(LDISC_TIME)); err = -ETIMEDOUT; continue; } else { @@ -485,6 +491,13 @@ long st_kim_start(void *kim_data) pr_info("ldisc_install = 0"); sysfs_notify(&kim_gdata->kim_pdev->dev.kobj, NULL, "install"); + /* this wait might be completed, though in the + * tty_close() since the ldisc is already + * installed */ + err = wait_for_completion_timeout + (&kim_gdata->ldisc_installed, + msecs_to_jiffies(LDISC_TIME)); + err = -EINVAL; continue; } else { /* on success don't retry */ break; -- cgit v1.2.3 From 76ff0e64d42fac59fb756536342a3d3f3e4e8833 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:36 -0500 Subject: drivers:misc: ti-st: free skb on firmware download If during validation of the firmware download the data doesn't match what is expected out of the chip, this calls for a firmware download failure and a retry. Free the SKB which collects response during such scenarios. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_kim.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/ti-st/st_kim.c b/drivers/misc/ti-st/st_kim.c index d8ca4068a928..3a3580566dfc 100644 --- a/drivers/misc/ti-st/st_kim.c +++ b/drivers/misc/ti-st/st_kim.c @@ -68,6 +68,7 @@ void validate_firmware_response(struct kim_data_s *kim_gdata) if (unlikely(skb->data[5] != 0)) { pr_err("no proper response during fw download"); pr_err("data6 %x", skb->data[5]); + kfree_skb(skb); return; /* keep waiting for the proper response */ } /* becos of all the script being downloaded */ -- cgit v1.2.3 From 651d62a8b0378b911f083a1712d9d228894f46d8 Mon Sep 17 00:00:00 2001 From: Pavan Savoy Date: Wed, 10 Aug 2011 10:18:37 -0500 Subject: drivers:misc: ti-st: fix unexpected UART close If suppose the UIM were to die and hence UART were to close when the Bluetooth/FM or GPS is turned on, prep the ST for a state where-in if the UIM comes back up, Bluetooth/FM/GPS can be turned on. Signed-off-by: Pavan Savoy Signed-off-by: Greg Kroah-Hartman --- drivers/misc/ti-st/st_core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/ti-st/st_core.c b/drivers/misc/ti-st/st_core.c index 1f973ce3043f..ba168a7d54d4 100644 --- a/drivers/misc/ti-st/st_core.c +++ b/drivers/misc/ti-st/st_core.c @@ -727,6 +727,7 @@ static void st_tty_close(struct tty_struct *tty) if (st_gdata->is_registered[i] == true) pr_err("%d not un-registered", i); st_gdata->list[i] = NULL; + st_gdata->is_registered[i] = false; } st_gdata->protos_registered = 0; spin_unlock_irqrestore(&st_gdata->lock, flags); -- cgit v1.2.3 From 6c4b47d243112e98811ce0da7bbb32cc3857dd1a Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Wed, 20 Jul 2011 20:17:49 +0900 Subject: pch_uart: Set PCIe bus number using probe parameter Currently, PCIe bus number is set as fixed value "2". However, PCIe bus number is not always "2". This patch sets bus number using probe() parameter. Signed-off-by: Tomoya MORINAGA Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/pch_uart.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c index 846dfcd3ce0d..b46218d679e2 100644 --- a/drivers/tty/serial/pch_uart.c +++ b/drivers/tty/serial/pch_uart.c @@ -598,7 +598,8 @@ static void pch_request_dma(struct uart_port *port) dma_cap_zero(mask); dma_cap_set(DMA_SLAVE, mask); - dma_dev = pci_get_bus_and_slot(2, PCI_DEVFN(0xa, 0)); /* Get DMA's dev + dma_dev = pci_get_bus_and_slot(priv->pdev->bus->number, + PCI_DEVFN(0xa, 0)); /* Get DMA's dev information */ /* Set Tx DMA */ param = &priv->param_tx; -- cgit v1.2.3 From 181d5762bd8eaa2881b7df27bad260bf4abda1bc Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 4 Aug 2011 03:13:10 -0500 Subject: drivers/serial/ucc_uart.c: Fix compiler warning drivers/tty/serial/ucc_uart.c: In function 'qe2cpu_addr': drivers/tty/serial/ucc_uart.c:238:2: warning: format '%x' expects type 'unsigned int', but argument 3 has type 'dma_addr_t' Signed-off-by: Kumar Gala Acked-by: Timur Tabi Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ucc_uart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c index c327218cad44..9af9f0879a24 100644 --- a/drivers/tty/serial/ucc_uart.c +++ b/drivers/tty/serial/ucc_uart.c @@ -235,7 +235,7 @@ static inline void *qe2cpu_addr(dma_addr_t addr, struct uart_qe_port *qe_port) return qe_port->bd_virt + (addr - qe_port->bd_dma_addr); /* something nasty happened */ - printk(KERN_ERR "%s: addr=%x\n", __func__, addr); + printk(KERN_ERR "%s: addr=%llx\n", __func__, (u64)addr); BUG(); return NULL; } -- cgit v1.2.3 From ab8ba3a2d2cba6a658ef596cd5b2e0905b6c8a9f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 16 Aug 2011 12:02:28 -0600 Subject: serial: 8250_pnp: add Intermec CV60 touchscreen device It would have been nice if Intermec had supplied a PNP0501 _CID for the COM3 device, but they didn't, so we have to recognize it explicitly. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=40612 CC: Jeff Chua Signed-off-by: Bjorn Helgaas Cc: stable Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250_pnp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/8250_pnp.c b/drivers/tty/serial/8250_pnp.c index fc301f6722e1..a2f236510ff1 100644 --- a/drivers/tty/serial/8250_pnp.c +++ b/drivers/tty/serial/8250_pnp.c @@ -109,6 +109,9 @@ static const struct pnp_device_id pnp_dev_table[] = { /* IBM */ /* IBM Thinkpad 701 Internal Modem Voice */ { "IBM0033", 0 }, + /* Intermec */ + /* Intermec CV60 touchscreen port */ + { "PNP4972", 0 }, /* Intertex */ /* Intertex 28k8 33k6 Voice EXT PnP */ { "IXDC801", 0 }, -- cgit v1.2.3 From 0d0a3cc183c50956fe1d9e37cca520debea93ad5 Mon Sep 17 00:00:00 2001 From: "Voss, Nikolaus" Date: Wed, 10 Aug 2011 14:02:29 +0200 Subject: atmel_serial: fix atmel_default_console_device reflect new static uart platform ids introduced by patch http://article.gmane.org/gmane.linux.kernel/1126105 Signed-off-by: Nikolaus Voss Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index af9b7814965a..b922f5d2e61e 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1609,9 +1609,11 @@ static struct console atmel_console = { static int __init atmel_console_init(void) { if (atmel_default_console_device) { - add_preferred_console(ATMEL_DEVICENAME, - atmel_default_console_device->id, NULL); - atmel_init_port(&atmel_ports[atmel_default_console_device->id], + struct atmel_uart_data *pdata = + atmel_default_console_device->dev.platform_data; + + add_preferred_console(ATMEL_DEVICENAME, pdata->num, NULL); + atmel_init_port(&atmel_ports[pdata->num], atmel_default_console_device); register_console(&atmel_console); } -- cgit v1.2.3 From b6bede3b4cdfbd188557ab50fceec2e91d295edf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 14 Aug 2011 17:13:00 +0000 Subject: xfs: fix tracing builds inside the source tree The code really requires the current source directory to be in the header search path. We already do this if building with an object tree separate from the source, but it needs to be added manually if building inside the source. The cflags addition for it accidentally got removed when collapsing the xfs directory structure. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Alex Elder --- fs/xfs/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index ffce328309b8..427a4e82a588 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,6 +16,8 @@ # Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA # +ccflags-y += -I$(src) # needed for trace events + ccflags-$(CONFIG_XFS_DEBUG) += -g obj-$(CONFIG_XFS_FS) += xfs.o -- cgit v1.2.3 From a2cc797d2d1a116b607de353de0ae1c2cab80b74 Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Mon, 22 Aug 2011 12:39:10 -0700 Subject: i915: do not setup intel_backlight twice The commit "Not all systems expose a firmware or platform mechanism for changing the backlight intensity on i915, so add native driver support" adds calls to intel_panel_setup_backlight() from intel_{lvds,dp}_init so do not call it again from intel_setup_outputs(). BugLink: http://bugs.launchpad.net/bugs/831542 Signed-off-by: Kamal Mostafa ACKed-by: Matthew Garrett Signed-off-by: Keith Packard --- drivers/gpu/drm/i915/intel_display.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ee1d701317f7..5a1ae9f06cb4 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7238,8 +7238,6 @@ static void intel_setup_outputs(struct drm_device *dev) intel_encoder_clones(dev, encoder->clone_mask); } - intel_panel_setup_backlight(dev); - /* disable all the possible outputs/crtcs before entering KMS mode */ drm_helper_disable_unused_functions(dev); } -- cgit v1.2.3 From e21757a05730f03f18fbfc528a919e0205aa6a61 Mon Sep 17 00:00:00 2001 From: Paul Walmsley Date: Mon, 22 Aug 2011 16:13:00 -0600 Subject: OMAP3: clock: indicate that gpt12_fck and wdt1_fck are in the WKUP clockdomain The oscillator that supplies GPT12_FCLK and WDT1_FCLK exists in the WKUP powerdomain[1]. This resolves at least one boot-time warning: omap_hwmod: gpt12_fck: missing clockdomain for gpt12_fck. 1. _OMAP34xx Multimedia High Security (HS) Device Silicon Revision 3.1.x Security Addendum Version K (SWPU119K)_ Figure 3-29. August 2010. --- arch/arm/mach-omap2/clock3xxx_data.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-omap2/clock3xxx_data.c b/arch/arm/mach-omap2/clock3xxx_data.c index ffd55b1c4396..b9b844683147 100644 --- a/arch/arm/mach-omap2/clock3xxx_data.c +++ b/arch/arm/mach-omap2/clock3xxx_data.c @@ -3078,6 +3078,7 @@ static struct clk gpt12_fck = { .name = "gpt12_fck", .ops = &clkops_null, .parent = &secure_32k_fck, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; @@ -3085,6 +3086,7 @@ static struct clk wdt1_fck = { .name = "wdt1_fck", .ops = &clkops_null, .parent = &secure_32k_fck, + .clkdm_name = "wkup_clkdm", .recalc = &followparent_recalc, }; -- cgit v1.2.3 From 81a081fff7f3c144a0da9ee726906e533f66dd89 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Mon, 22 Aug 2011 09:22:41 -0500 Subject: sound/soc/fsl/fsl_dma.c: add missing of_node_put of_parse_phandle increments the reference count of np, so this should be decremented before trying the next possibility. Since we don't actually use np, we can decrement the reference count immediately. Reported-by: Julia Lawall Signed-off-by: Timur Tabi Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/fsl/fsl_dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/soc/fsl/fsl_dma.c b/sound/soc/fsl/fsl_dma.c index 732208c8c0b4..cb50598338e9 100644 --- a/sound/soc/fsl/fsl_dma.c +++ b/sound/soc/fsl/fsl_dma.c @@ -879,10 +879,12 @@ static struct device_node *find_ssi_node(struct device_node *dma_channel_np) * assume that device_node pointers are a valid comparison. */ np = of_parse_phandle(ssi_np, "fsl,playback-dma", 0); + of_node_put(np); if (np == dma_channel_np) return ssi_np; np = of_parse_phandle(ssi_np, "fsl,capture-dma", 0); + of_node_put(np); if (np == dma_channel_np) return ssi_np; } -- cgit v1.2.3 From 57cf9d4512c86a4a1b58857ca22b18bffbfd6812 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sat, 20 Aug 2011 11:03:44 +0800 Subject: ASoC: soc-core: use GFP_KERNEL flag for kmalloc in snd_soc_cnew GFP_ATOMIC is not needed here, use GFP_KERNEL instead. Signed-off-by: Axel Lin Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/soc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 83ad8ca27490..b085d8e87574 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1913,7 +1913,7 @@ struct snd_kcontrol *snd_soc_cnew(const struct snd_kcontrol_new *_template, if (prefix) { name_len = strlen(long_name) + strlen(prefix) + 2; - name = kmalloc(name_len, GFP_ATOMIC); + name = kmalloc(name_len, GFP_KERNEL); if (!name) return NULL; -- cgit v1.2.3 From 96101bd0bf7974686f6875c065a7a9a83cd2107a Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 20 Aug 2011 08:12:38 +0200 Subject: sound/soc/kirkwood/kirkwood-i2s.c: add missing kfree Adjust the goto to jump to the error handling code that includes kfree. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; expression E1!=0,E2,E3,E4; statement S; iterator I; @@ ( if (...) { ... when != kfree(x) when != x = E3 when != E3 = x * return ...; } ... when != x = E2 when != I(...,x,...) S if (...) { ... when != x = E4 kfree(x); ... return ...; } ) // Signed-off-by: Julia Lawall Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/kirkwood/kirkwood-i2s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/kirkwood/kirkwood-i2s.c b/sound/soc/kirkwood/kirkwood-i2s.c index a33fc51f363b..8f16cd37c2af 100644 --- a/sound/soc/kirkwood/kirkwood-i2s.c +++ b/sound/soc/kirkwood/kirkwood-i2s.c @@ -424,7 +424,7 @@ static __devinit int kirkwood_i2s_dev_probe(struct platform_device *pdev) if (!priv->mem) { dev_err(&pdev->dev, "request_mem_region failed\n"); err = -EBUSY; - goto error; + goto error_alloc; } priv->io = ioremap(priv->mem->start, SZ_16K); -- cgit v1.2.3 From 5006b313283c56cfda498513b7091692bcd74433 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 20 Aug 2011 08:12:39 +0200 Subject: sound/soc/ep93xx/ep93xx-i2s.c: add missing kfree Introduce a new label that includes kfree and jump to that one. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; expression E1!=0,E2,E3,E4; statement S; iterator I; @@ ( if (...) { ... when != kfree(x) when != x = E3 when != E3 = x * return ...; } ... when != x = E2 when != I(...,x,...) S if (...) { ... when != x = E4 kfree(x); ... return ...; } ) // Signed-off-by: Julia Lawall Acked-by: Alexander Sverdlin Reviewed-by: H Hartley Sweeten Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/ep93xx/ep93xx-i2s.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/ep93xx/ep93xx-i2s.c b/sound/soc/ep93xx/ep93xx-i2s.c index 56efa0c1c9a9..099614e16651 100644 --- a/sound/soc/ep93xx/ep93xx-i2s.c +++ b/sound/soc/ep93xx/ep93xx-i2s.c @@ -385,14 +385,14 @@ static int ep93xx_i2s_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) { err = -ENODEV; - goto fail; + goto fail_free_info; } info->mem = request_mem_region(res->start, resource_size(res), pdev->name); if (!info->mem) { err = -EBUSY; - goto fail; + goto fail_free_info; } info->regs = ioremap(info->mem->start, resource_size(info->mem)); @@ -435,6 +435,7 @@ fail_unmap_mem: iounmap(info->regs); fail_release_mem: release_mem_region(info->mem->start, resource_size(info->mem)); +fail_free_info: kfree(info); fail: return err; -- cgit v1.2.3 From 178b279b645a14ca8ea01e4ea818c88681a31b07 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 20 Aug 2011 09:02:00 +0200 Subject: sound/soc/fsl/p1022_ds.c: add missing of_node_put dma_channel_np has been accessed at this point, so decrease its reference count before leaving the function. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; expression E1!=0,E2,E3,E4; statement S; iterator I; @@ ( if (...) { ... when != of_node_put(x) when != x = E3 when != E3 = x * return ...; } ... when != x = E2 when != I(...,x,...) S if (...) { ... when != x = E4 of_node_put(x); ... return ...; } ) // Signed-off-by: Julia Lawall Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/fsl/p1022_ds.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/fsl/p1022_ds.c b/sound/soc/fsl/p1022_ds.c index 8fa4d5f8eda1..fcb862eb0c73 100644 --- a/sound/soc/fsl/p1022_ds.c +++ b/sound/soc/fsl/p1022_ds.c @@ -297,8 +297,10 @@ static int get_dma_channel(struct device_node *ssi_np, * dai->platform name should already point to an allocated buffer. */ ret = of_address_to_resource(dma_channel_np, 0, &res); - if (ret) + if (ret) { + of_node_put(dma_channel_np); return ret; + } snprintf((char *)dai->platform_name, DAI_NAME_SIZE, "%llx.%s", (unsigned long long) res.start, dma_channel_np->name); -- cgit v1.2.3 From c09f5ca7bdc9a82c5f721bc28c46d65452240cfa Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 20 Aug 2011 09:02:01 +0200 Subject: sound/soc/fsl/mpc8610_hpcd.c: add missing of_node_put The first change is to add an of_node_put, since codec_np has previously been allocated. The rest of the patch reorganizes the error handling code so the only code executed is that which is needed. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ identifier x; expression E1!=0,E2,E3,E4; statement S; iterator I; @@ ( if (...) { ... when != of_node_put(x) when != x = E3 when != E3 = x * return ...; } ... when != x = E2 when != I(...,x,...) S if (...) { ... when != x = E4 of_node_put(x); ... return ...; } ) // Signed-off-by: Julia Lawall Acked-by: Timur Tabi Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/fsl/mpc8610_hpcd.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/sound/soc/fsl/mpc8610_hpcd.c b/sound/soc/fsl/mpc8610_hpcd.c index a19297959587..358f0baaf71b 100644 --- a/sound/soc/fsl/mpc8610_hpcd.c +++ b/sound/soc/fsl/mpc8610_hpcd.c @@ -345,8 +345,10 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) } machine_data = kzalloc(sizeof(struct mpc8610_hpcd_data), GFP_KERNEL); - if (!machine_data) - return -ENOMEM; + if (!machine_data) { + ret = -ENOMEM; + goto error_alloc; + } machine_data->dai[0].cpu_dai_name = dev_name(&ssi_pdev->dev); machine_data->dai[0].ops = &mpc8610_hpcd_ops; @@ -494,7 +496,7 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) ret = platform_device_add(sound_device); if (ret) { dev_err(&pdev->dev, "platform device add failed\n"); - goto error; + goto error_sound; } dev_set_drvdata(&pdev->dev, sound_device); @@ -502,14 +504,12 @@ static int mpc8610_hpcd_probe(struct platform_device *pdev) return 0; +error_sound: + platform_device_unregister(sound_device); error: - of_node_put(codec_np); - - if (sound_device) - platform_device_unregister(sound_device); - kfree(machine_data); - +error_alloc: + of_node_put(codec_np); return ret; } -- cgit v1.2.3 From 0278ccd9d53e07c4e699432b2fed9de6c56f506c Mon Sep 17 00:00:00 2001 From: Chris Boot Date: Mon, 22 Aug 2011 21:38:38 +0100 Subject: firewire: sbp2: fix panic after rmmod with slow targets If firewire-sbp2 starts a login to a target that doesn't complete ORBs in a timely manner (and has to retry the login), and the module is removed before the operation times out, you end up with a null-pointer dereference and a kernel panic. [SR: This happens because sbp2_target_get/put() do not maintain module references. scsi_device_get/put() do, but at occasions like Chris describes one, nobody holds a reference to an SBP-2 sdev.] This patch cancels pending work for each unit in sbp2_remove(), which hopefully means there are no extra references around that prevent us from unloading. This fixes my crash. Signed-off-by: Chris Boot Signed-off-by: Stefan Richter --- drivers/firewire/sbp2.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/firewire/sbp2.c b/drivers/firewire/sbp2.c index 41841a3e3f99..17cef864506a 100644 --- a/drivers/firewire/sbp2.c +++ b/drivers/firewire/sbp2.c @@ -1198,6 +1198,10 @@ static int sbp2_remove(struct device *dev) { struct fw_unit *unit = fw_unit(dev); struct sbp2_target *tgt = dev_get_drvdata(&unit->device); + struct sbp2_logical_unit *lu; + + list_for_each_entry(lu, &tgt->lu_list, link) + cancel_delayed_work_sync(&lu->work); sbp2_target_put(tgt); return 0; -- cgit v1.2.3 From 11f3a6bdc2528d1ce2af50202dbf7138fdee1b34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Aug 2011 06:05:59 +0000 Subject: bridge: fix a possible net_device leak Jan Beulich reported a possible net_device leak in bridge code after commit bb900b27a2f4 (bridge: allow creating bridge devices with netlink) Reported-by: Jan Beulich Signed-off-by: Eric Dumazet Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 2cdf0070419f..e73815456adf 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -231,6 +231,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, int br_add_bridge(struct net *net, const char *name) { struct net_device *dev; + int res; dev = alloc_netdev(sizeof(struct net_bridge), name, br_dev_setup); @@ -240,7 +241,10 @@ int br_add_bridge(struct net *net, const char *name) dev_net_set(dev, net); - return register_netdev(dev); + res = register_netdev(dev); + if (res) + free_netdev(dev); + return res; } int br_del_bridge(struct net *net, const char *name) -- cgit v1.2.3 From f5e4282586dc0c9dab8c7d32e6c43aa07f68586b Mon Sep 17 00:00:00 2001 From: Jeremiah Matthey Date: Tue, 23 Aug 2011 09:44:30 +0200 Subject: HID: usbhid: Add support for SiGma Micro chip Patch to add SiGma Micro-based keyboards (1c4f:0002) to hid-quirks. These keyboards dont seem to allow the records to be initialized, and hence a timeout occurs when the usbhid driver attempts to initialize them. The patch just adds the signature for these keyboards to the hid-quirks list with the setting HID_QUIRK_NO_INIT_REPORTS. This removes the 5-10 second wait for the timeout to occur. Signed-off-by: Jeremiah Matthey Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 3 +++ drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 61c880939f56..7d27d2b0445a 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -579,6 +579,9 @@ #define USB_DEVICE_ID_SAMSUNG_IR_REMOTE 0x0001 #define USB_DEVICE_ID_SAMSUNG_WIRELESS_KBD_MOUSE 0x0600 +#define USB_VENDOR_ID_SIGMA_MICRO 0x1c4f +#define USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD 0x0002 + #define USB_VENDOR_ID_SKYCABLE 0x1223 #define USB_DEVICE_ID_SKYCABLE_WIRELESS_PRESENTER 0x3F07 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 621959d5cc42..4bdb5d46c52c 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -89,6 +89,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_MULTI_TOUCH, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_SIGMA_MICRO, USB_DEVICE_ID_SIGMA_MICRO_KEYBOARD, HID_QUIRK_NO_INIT_REPORTS }, { 0, 0 } }; -- cgit v1.2.3 From 7c4c3960dff109bc5db4c35da481c212dadb5eb5 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Mon, 22 Aug 2011 21:17:57 +0000 Subject: drm/ttm: fix ttm_bo_add_ttm(user) failure path ttm_tt_destroy kfrees passed object, so we need to nullify a reference to it. Signed-off-by: Marcin Slusarz Cc: stable@kernel.org Reviewed-by: Thomas Hellstrom Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 56619f64b6bf..384116afe5b7 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -353,8 +353,10 @@ static int ttm_bo_add_ttm(struct ttm_buffer_object *bo, bool zero_alloc) ret = ttm_tt_set_user(bo->ttm, current, bo->buffer_start, bo->num_pages); - if (unlikely(ret != 0)) + if (unlikely(ret != 0)) { ttm_tt_destroy(bo->ttm); + bo->ttm = NULL; + } break; default: printk(KERN_ERR TTM_PFX "Illegal buffer object type\n"); -- cgit v1.2.3 From eac2095398668f989a3dd8d00be1b87850d78c01 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 22 Aug 2011 03:15:04 +0000 Subject: drm/ttm: unbind ttm before destroying node in accel move cleanup Nouveau makes the assumption that if a TTM is bound there will be a mm_node around for it and the backwards ordering here resulted in a use-after-free on some eviction paths. Signed-off-by: Ben Skeggs Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 77dbf408c0d0..ae3c6f5dd2b7 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -635,13 +635,13 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo, if (ret) return ret; - ttm_bo_free_old_node(bo); if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm != NULL)) { ttm_tt_unbind(bo->ttm); ttm_tt_destroy(bo->ttm); bo->ttm = NULL; } + ttm_bo_free_old_node(bo); } else { /** * This should help pipeline ordinary buffer moves. -- cgit v1.2.3 From 8d3bb23609d4ae22803a15d232289fc09a7b61c4 Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Mon, 22 Aug 2011 03:15:05 +0000 Subject: drm/ttm: ensure ttm for new node is bound before calling move_notify() This was true for new TTM_PL_SYSTEM and new TTM_PL_TT cases, but wasn't the case on TTM_PL_SYSTEM<->TTM_PL_TT moves, which causes trouble on some paths as nouveau's move_notify() hook requires that the dma addresses be valid at this point. Signed-off-by: Ben Skeggs Signed-off-by: Dave Airlie --- drivers/gpu/drm/ttm/ttm_bo.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 384116afe5b7..a4d38d85909a 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -392,10 +392,12 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo, * Create and bind a ttm if required. */ - if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && (bo->ttm == NULL)) { - ret = ttm_bo_add_ttm(bo, false); - if (ret) - goto out_err; + if (!(new_man->flags & TTM_MEMTYPE_FLAG_FIXED)) { + if (bo->ttm == NULL) { + ret = ttm_bo_add_ttm(bo, false); + if (ret) + goto out_err; + } ret = ttm_tt_set_placement_caching(bo->ttm, mem->placement); if (ret) -- cgit v1.2.3 From 3989ef6cfb80825af2f7933415797f052817ac3e Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Aug 2011 11:43:20 +0200 Subject: HID: wiimote: Simplify synchronization The new locking scheme in HID core allows us to remove a bit of synchronization. Since the HID layer acts synchronously we simply register input core last and there are no synchonization issues anymore. Also register sysfs files after that to simplify the code. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote.c | 78 ++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c index a594383ce03d..8a68bf515cad 100644 --- a/drivers/hid/hid-wiimote.c +++ b/drivers/hid/hid-wiimote.c @@ -10,7 +10,6 @@ * any later version. */ -#include #include #include #include @@ -33,7 +32,6 @@ struct wiimote_state { }; struct wiimote_data { - atomic_t ready; struct hid_device *hdev; struct input_dev *input; @@ -200,9 +198,6 @@ static ssize_t wiifs_led_show_##num(struct device *dev, \ unsigned long flags; \ int state; \ \ - if (!atomic_read(&wdata->ready)) \ - return -EBUSY; \ - \ spin_lock_irqsave(&wdata->state.lock, flags); \ state = !!(wdata->state.flags & WIIPROTO_FLAG_LED##num); \ spin_unlock_irqrestore(&wdata->state.lock, flags); \ @@ -217,9 +212,6 @@ static ssize_t wiifs_led_set_##num(struct device *dev, \ unsigned long flags; \ __u8 state; \ \ - if (!atomic_read(&wdata->ready)) \ - return -EBUSY; \ - \ spin_lock_irqsave(&wdata->state.lock, flags); \ \ state = wdata->state.flags; \ @@ -244,13 +236,6 @@ wiifs_led_show_set(4); static int wiimote_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { - struct wiimote_data *wdata = input_get_drvdata(dev); - - if (!atomic_read(&wdata->ready)) - return -EBUSY; - /* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */ - smp_rmb(); - return 0; } @@ -300,11 +285,6 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report, int i; unsigned long flags; - if (!atomic_read(&wdata->ready)) - return -EBUSY; - /* smp_rmb: Make sure wdata->xy is available when wdata->ready is 1 */ - smp_rmb(); - if (size < 1) return -EINVAL; @@ -362,6 +342,15 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev) static void wiimote_destroy(struct wiimote_data *wdata) { + device_remove_file(&wdata->hdev->dev, &dev_attr_led1); + device_remove_file(&wdata->hdev->dev, &dev_attr_led2); + device_remove_file(&wdata->hdev->dev, &dev_attr_led3); + device_remove_file(&wdata->hdev->dev, &dev_attr_led4); + + input_unregister_device(wdata->input); + cancel_work_sync(&wdata->worker); + hid_hw_stop(wdata->hdev); + kfree(wdata); } @@ -377,19 +366,6 @@ static int wiimote_hid_probe(struct hid_device *hdev, return -ENOMEM; } - ret = device_create_file(&hdev->dev, &dev_attr_led1); - if (ret) - goto err; - ret = device_create_file(&hdev->dev, &dev_attr_led2); - if (ret) - goto err; - ret = device_create_file(&hdev->dev, &dev_attr_led3); - if (ret) - goto err; - ret = device_create_file(&hdev->dev, &dev_attr_led4); - if (ret) - goto err; - ret = hid_parse(hdev); if (ret) { hid_err(hdev, "HID parse failed\n"); @@ -408,9 +384,19 @@ static int wiimote_hid_probe(struct hid_device *hdev, goto err_stop; } - /* smp_wmb: Write wdata->xy first before wdata->ready is set to 1 */ - smp_wmb(); - atomic_set(&wdata->ready, 1); + ret = device_create_file(&hdev->dev, &dev_attr_led1); + if (ret) + goto err_free; + ret = device_create_file(&hdev->dev, &dev_attr_led2); + if (ret) + goto err_free; + ret = device_create_file(&hdev->dev, &dev_attr_led3); + if (ret) + goto err_free; + ret = device_create_file(&hdev->dev, &dev_attr_led4); + if (ret) + goto err_free; + hid_info(hdev, "New device registered\n"); /* by default set led1 after device initialization */ @@ -420,15 +406,15 @@ static int wiimote_hid_probe(struct hid_device *hdev, return 0; +err_free: + wiimote_destroy(wdata); + return ret; + err_stop: hid_hw_stop(hdev); err: input_free_device(wdata->input); - device_remove_file(&hdev->dev, &dev_attr_led1); - device_remove_file(&hdev->dev, &dev_attr_led2); - device_remove_file(&hdev->dev, &dev_attr_led3); - device_remove_file(&hdev->dev, &dev_attr_led4); - wiimote_destroy(wdata); + kfree(wdata); return ret; } @@ -437,16 +423,6 @@ static void wiimote_hid_remove(struct hid_device *hdev) struct wiimote_data *wdata = hid_get_drvdata(hdev); hid_info(hdev, "Device removed\n"); - - device_remove_file(&hdev->dev, &dev_attr_led1); - device_remove_file(&hdev->dev, &dev_attr_led2); - device_remove_file(&hdev->dev, &dev_attr_led3); - device_remove_file(&hdev->dev, &dev_attr_led4); - - hid_hw_stop(hdev); - input_unregister_device(wdata->input); - - cancel_work_sync(&wdata->worker); wiimote_destroy(wdata); } -- cgit v1.2.3 From 26af17484a737aaa991a7ce578cb15809a582fbc Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Aug 2011 11:43:21 +0200 Subject: HID: wiimote: Correctly call HID open/close callbacks Even though the bluetooth hid backend does not react on open/close callbacks, we should call them to be consistent with other hid drivers. Also the new input open/close handlers will be used in future to prepare the wiimote device for IR/extension input. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c index 8a68bf515cad..d49f67c7a00c 100644 --- a/drivers/hid/hid-wiimote.c +++ b/drivers/hid/hid-wiimote.c @@ -239,6 +239,20 @@ static int wiimote_input_event(struct input_dev *dev, unsigned int type, return 0; } +static int wiimote_input_open(struct input_dev *dev) +{ + struct wiimote_data *wdata = input_get_drvdata(dev); + + return hid_hw_open(wdata->hdev); +} + +static void wiimote_input_close(struct input_dev *dev) +{ + struct wiimote_data *wdata = input_get_drvdata(dev); + + hid_hw_close(wdata->hdev); +} + static void handler_keys(struct wiimote_data *wdata, const __u8 *payload) { input_report_key(wdata->input, wiiproto_keymap[WIIPROTO_KEY_LEFT], @@ -321,6 +335,8 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev) input_set_drvdata(wdata->input, wdata); wdata->input->event = wiimote_input_event; + wdata->input->open = wiimote_input_open; + wdata->input->close = wiimote_input_close; wdata->input->dev.parent = &wdata->hdev->dev; wdata->input->id.bustype = wdata->hdev->bus; wdata->input->id.vendor = wdata->hdev->vendor; -- cgit v1.2.3 From 23a5a4a39eddbe515a832767a371cc54e82cc25e Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Aug 2011 11:43:22 +0200 Subject: HID: wiimote: Register led class devices This registers 4 led devices to allow controlling the wiimote leds via standard LED sysfs API. It removes the four sysfs attributes so we don't have two APIs for one device. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/Kconfig | 1 + drivers/hid/hid-wiimote.c | 165 +++++++++++++++++++++++++++++----------------- 2 files changed, 107 insertions(+), 59 deletions(-) diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index 306b15f39c9c..1130a8987125 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -589,6 +589,7 @@ config HID_WACOM_POWER_SUPPLY config HID_WIIMOTE tristate "Nintendo Wii Remote support" depends on BT_HIDP + depends on LEDS_CLASS ---help--- Support for the Nintendo Wii Remote bluetooth device. diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c index d49f67c7a00c..29edd55d4bb0 100644 --- a/drivers/hid/hid-wiimote.c +++ b/drivers/hid/hid-wiimote.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "hid-ids.h" @@ -34,6 +35,7 @@ struct wiimote_state { struct wiimote_data { struct hid_device *hdev; struct input_dev *input; + struct led_classdev *leds[4]; spinlock_t qlock; __u8 head; @@ -51,6 +53,9 @@ struct wiimote_data { #define WIIPROTO_FLAGS_LEDS (WIIPROTO_FLAG_LED1 | WIIPROTO_FLAG_LED2 | \ WIIPROTO_FLAG_LED3 | WIIPROTO_FLAG_LED4) +/* return flag for led \num */ +#define WIIPROTO_FLAG_LED(num) (WIIPROTO_FLAG_LED1 << (num - 1)) + enum wiiproto_reqs { WIIPROTO_REQ_LED = 0x11, WIIPROTO_REQ_DRM_K = 0x30, @@ -85,9 +90,6 @@ static __u16 wiiproto_keymap[] = { BTN_MODE, /* WIIPROTO_KEY_HOME */ }; -#define dev_to_wii(pdev) hid_get_drvdata(container_of(pdev, struct hid_device, \ - dev)) - static ssize_t wiimote_hid_send(struct hid_device *hdev, __u8 *buffer, size_t count) { @@ -190,48 +192,53 @@ static void wiiproto_req_leds(struct wiimote_data *wdata, int leds) wiimote_queue(wdata, cmd, sizeof(cmd)); } -#define wiifs_led_show_set(num) \ -static ssize_t wiifs_led_show_##num(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct wiimote_data *wdata = dev_to_wii(dev); \ - unsigned long flags; \ - int state; \ - \ - spin_lock_irqsave(&wdata->state.lock, flags); \ - state = !!(wdata->state.flags & WIIPROTO_FLAG_LED##num); \ - spin_unlock_irqrestore(&wdata->state.lock, flags); \ - \ - return sprintf(buf, "%d\n", state); \ -} \ -static ssize_t wiifs_led_set_##num(struct device *dev, \ - struct device_attribute *attr, const char *buf, size_t count) \ -{ \ - struct wiimote_data *wdata = dev_to_wii(dev); \ - int tmp = simple_strtoul(buf, NULL, 10); \ - unsigned long flags; \ - __u8 state; \ - \ - spin_lock_irqsave(&wdata->state.lock, flags); \ - \ - state = wdata->state.flags; \ - \ - if (tmp) \ - wiiproto_req_leds(wdata, state | WIIPROTO_FLAG_LED##num);\ - else \ - wiiproto_req_leds(wdata, state & ~WIIPROTO_FLAG_LED##num);\ - \ - spin_unlock_irqrestore(&wdata->state.lock, flags); \ - \ - return count; \ -} \ -static DEVICE_ATTR(led##num, S_IRUGO | S_IWUSR, wiifs_led_show_##num, \ - wiifs_led_set_##num) - -wiifs_led_show_set(1); -wiifs_led_show_set(2); -wiifs_led_show_set(3); -wiifs_led_show_set(4); +static enum led_brightness wiimote_leds_get(struct led_classdev *led_dev) +{ + struct wiimote_data *wdata; + struct device *dev = led_dev->dev->parent; + int i; + unsigned long flags; + bool value = false; + + wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev)); + + for (i = 0; i < 4; ++i) { + if (wdata->leds[i] == led_dev) { + spin_lock_irqsave(&wdata->state.lock, flags); + value = wdata->state.flags & WIIPROTO_FLAG_LED(i + 1); + spin_unlock_irqrestore(&wdata->state.lock, flags); + break; + } + } + + return value ? LED_FULL : LED_OFF; +} + +static void wiimote_leds_set(struct led_classdev *led_dev, + enum led_brightness value) +{ + struct wiimote_data *wdata; + struct device *dev = led_dev->dev->parent; + int i; + unsigned long flags; + __u8 state, flag; + + wdata = hid_get_drvdata(container_of(dev, struct hid_device, dev)); + + for (i = 0; i < 4; ++i) { + if (wdata->leds[i] == led_dev) { + flag = WIIPROTO_FLAG_LED(i + 1); + spin_lock_irqsave(&wdata->state.lock, flags); + state = wdata->state.flags; + if (value == LED_OFF) + wiiproto_req_leds(wdata, state & ~flag); + else + wiiproto_req_leds(wdata, state | flag); + spin_unlock_irqrestore(&wdata->state.lock, flags); + break; + } + } +} static int wiimote_input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) @@ -315,6 +322,58 @@ static int wiimote_hid_event(struct hid_device *hdev, struct hid_report *report, return 0; } +static void wiimote_leds_destroy(struct wiimote_data *wdata) +{ + int i; + struct led_classdev *led; + + for (i = 0; i < 4; ++i) { + if (wdata->leds[i]) { + led = wdata->leds[i]; + wdata->leds[i] = NULL; + led_classdev_unregister(led); + kfree(led); + } + } +} + +static int wiimote_leds_create(struct wiimote_data *wdata) +{ + int i, ret; + struct device *dev = &wdata->hdev->dev; + size_t namesz = strlen(dev_name(dev)) + 9; + struct led_classdev *led; + char *name; + + for (i = 0; i < 4; ++i) { + led = kzalloc(sizeof(struct led_classdev) + namesz, GFP_KERNEL); + if (!led) { + ret = -ENOMEM; + goto err; + } + name = (void*)&led[1]; + snprintf(name, namesz, "%s:blue:p%d", dev_name(dev), i); + led->name = name; + led->brightness = 0; + led->max_brightness = 1; + led->brightness_get = wiimote_leds_get; + led->brightness_set = wiimote_leds_set; + + ret = led_classdev_register(dev, led); + if (ret) { + kfree(led); + goto err; + } + wdata->leds[i] = led; + } + + return 0; + +err: + wiimote_leds_destroy(wdata); + return ret; +} + static struct wiimote_data *wiimote_create(struct hid_device *hdev) { struct wiimote_data *wdata; @@ -358,10 +417,7 @@ static struct wiimote_data *wiimote_create(struct hid_device *hdev) static void wiimote_destroy(struct wiimote_data *wdata) { - device_remove_file(&wdata->hdev->dev, &dev_attr_led1); - device_remove_file(&wdata->hdev->dev, &dev_attr_led2); - device_remove_file(&wdata->hdev->dev, &dev_attr_led3); - device_remove_file(&wdata->hdev->dev, &dev_attr_led4); + wiimote_leds_destroy(wdata); input_unregister_device(wdata->input); cancel_work_sync(&wdata->worker); @@ -400,16 +456,7 @@ static int wiimote_hid_probe(struct hid_device *hdev, goto err_stop; } - ret = device_create_file(&hdev->dev, &dev_attr_led1); - if (ret) - goto err_free; - ret = device_create_file(&hdev->dev, &dev_attr_led2); - if (ret) - goto err_free; - ret = device_create_file(&hdev->dev, &dev_attr_led3); - if (ret) - goto err_free; - ret = device_create_file(&hdev->dev, &dev_attr_led4); + ret = wiimote_leds_create(wdata); if (ret) goto err_free; -- cgit v1.2.3 From 2cb5e4bc530471e9596cd32390bf70c8ada13d9a Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Aug 2011 11:43:23 +0200 Subject: HID: wiimote: Add drm request The wiimote reports data in several data reporting modes (DRM). The DRM request makes the wiimote send data in the requested drm. The DRM mode can be set explicitely or can be chosen by the driver. To let the driver choose the DRM mode, pass WIIPROTO_REQ_NULL placeholder to it. This is no valid request and is replaced with an appropriate DRM. Currently, the driver always sets the basic DRM_K mode, but this will be extended when further peripherals like accelerometer and IR are supported. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c index 29edd55d4bb0..84c9eb9c8e0d 100644 --- a/drivers/hid/hid-wiimote.c +++ b/drivers/hid/hid-wiimote.c @@ -57,7 +57,9 @@ struct wiimote_data { #define WIIPROTO_FLAG_LED(num) (WIIPROTO_FLAG_LED1 << (num - 1)) enum wiiproto_reqs { + WIIPROTO_REQ_NULL = 0x0, WIIPROTO_REQ_LED = 0x11, + WIIPROTO_REQ_DRM = 0x12, WIIPROTO_REQ_DRM_K = 0x30, }; @@ -192,6 +194,30 @@ static void wiiproto_req_leds(struct wiimote_data *wdata, int leds) wiimote_queue(wdata, cmd, sizeof(cmd)); } +/* + * Check what peripherals of the wiimote are currently + * active and select a proper DRM that supports all of + * the requested data inputs. + */ +static __u8 select_drm(struct wiimote_data *wdata) +{ + return WIIPROTO_REQ_DRM_K; +} + +static void wiiproto_req_drm(struct wiimote_data *wdata, __u8 drm) +{ + __u8 cmd[3]; + + if (drm == WIIPROTO_REQ_NULL) + drm = select_drm(wdata); + + cmd[0] = WIIPROTO_REQ_DRM; + cmd[1] = 0; + cmd[2] = drm; + + wiimote_queue(wdata, cmd, sizeof(cmd)); +} + static enum led_brightness wiimote_leds_get(struct led_classdev *led_dev) { struct wiimote_data *wdata; -- cgit v1.2.3 From c87019e41d61f3f972bd2f6a2380fc9896e4ab74 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Wed, 17 Aug 2011 11:43:24 +0200 Subject: HID: wiimote: Add status and return request handlers The wiimote resets the current drm when an extension is plugged in. Fortunately, it also sends a status report in this situation so we just reset the drm on every status report to keep the drm consistent. Also handle return reports from the wiimote which indicate success and failure of requests that we've sent. Signed-off-by: David Herrmann Signed-off-by: Jiri Kosina --- drivers/hid/hid-wiimote.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/drivers/hid/hid-wiimote.c b/drivers/hid/hid-wiimote.c index 84c9eb9c8e0d..85a02e5f9fe8 100644 --- a/drivers/hid/hid-wiimote.c +++ b/drivers/hid/hid-wiimote.c @@ -60,6 +60,8 @@ enum wiiproto_reqs { WIIPROTO_REQ_NULL = 0x0, WIIPROTO_REQ_LED = 0x11, WIIPROTO_REQ_DRM = 0x12, + WIIPROTO_REQ_STATUS = 0x20, + WIIPROTO_REQ_RETURN = 0x22, WIIPROTO_REQ_DRM_K = 0x30, }; @@ -313,6 +315,26 @@ static void handler_keys(struct wiimote_data *wdata, const __u8 *payload) input_sync(wdata->input); } +static void handler_status(struct wiimote_data *wdata, const __u8 *payload) +{ + handler_keys(wdata, payload); + + /* on status reports the drm is reset so we need to resend the drm */ + wiiproto_req_drm(wdata, WIIPROTO_REQ_NULL); +} + +static void handler_return(struct wiimote_data *wdata, const __u8 *payload) +{ + __u8 err = payload[3]; + __u8 cmd = payload[2]; + + handler_keys(wdata, payload); + + if (err) + hid_warn(wdata->hdev, "Remote error %hhu on req %hhu\n", err, + cmd); +} + struct wiiproto_handler { __u8 id; size_t size; @@ -320,6 +342,8 @@ struct wiiproto_handler { }; static struct wiiproto_handler handlers[] = { + { .id = WIIPROTO_REQ_STATUS, .size = 6, .func = handler_status }, + { .id = WIIPROTO_REQ_RETURN, .size = 4, .func = handler_return }, { .id = WIIPROTO_REQ_DRM_K, .size = 2, .func = handler_keys }, { .id = 0 } }; -- cgit v1.2.3 From 675c1aa3c4a7290e537e854d0af7cdf9692bd396 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2011 12:36:28 +0200 Subject: ALSA: hda - Fix output-path initialization for Realtek auto-parser When the headphone or speaker output has no own DAC, initialize the path using the primary DAC. Otherwise the path won't be set properly and can result in the silence. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index fcb11af9ad24..0fefc1088d11 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -3083,16 +3083,22 @@ static void alc_auto_init_multi_out(struct hda_codec *codec) static void alc_auto_init_extra_out(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - hda_nid_t pin; + hda_nid_t pin, dac; pin = spec->autocfg.hp_pins[0]; - if (pin) - alc_auto_set_output_and_unmute(codec, pin, PIN_HP, - spec->multiout.hp_nid); + if (pin) { + dac = spec->multiout.hp_nid; + if (!dac) + dac = spec->multiout.dac_nids[0]; + alc_auto_set_output_and_unmute(codec, pin, PIN_HP, dac); + } pin = spec->autocfg.speaker_pins[0]; - if (pin) - alc_auto_set_output_and_unmute(codec, pin, PIN_OUT, - spec->multiout.extra_out_nid[0]); + if (pin) { + dac = spec->multiout.extra_out_nid[0]; + if (!dac) + dac = spec->multiout.dac_nids[0]; + alc_auto_set_output_and_unmute(codec, pin, PIN_OUT, dac); + } } /* -- cgit v1.2.3 From 3c715a98844f72cec0fa3ef2b68232b8f751468b Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2011 12:41:09 +0200 Subject: ALSA: hda - Update jack-sense info even when no automute is set The internal states, jack_present and line_jack_present should be updated upon unsolicited events even if no automute is set. Otherwise the wrong state is referred when the automute behavior is changed by the mixer control. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 0fefc1088d11..7cabd7317163 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -565,11 +565,11 @@ static void alc_hp_automute(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - if (!spec->automute) - return; spec->jack_present = detect_jacks(codec, ARRAY_SIZE(spec->autocfg.hp_pins), spec->autocfg.hp_pins); + if (!spec->automute) + return; update_speakers(codec); } @@ -578,11 +578,11 @@ static void alc_line_automute(struct hda_codec *codec) { struct alc_spec *spec = codec->spec; - if (!spec->automute || !spec->detect_line) - return; spec->line_jack_present = detect_jacks(codec, ARRAY_SIZE(spec->autocfg.line_out_pins), spec->autocfg.line_out_pins); + if (!spec->automute || !spec->detect_line) + return; update_speakers(codec); } -- cgit v1.2.3 From f2b60717e692550bf753a5d64a5b69ea430fc832 Mon Sep 17 00:00:00 2001 From: Thomas Reim Date: Wed, 17 Aug 2011 09:03:32 +0000 Subject: drm/radeon: Extended DDC Probing for Toshiba L300D Radeon Mobility X1100 HDMI-A Connector Toshiba Satellite L300D with ATI Mobility Radeon X1100 sends data to i2c bus for a HDMI connector that is not implemented/existent on the notebook's board. Fix by applying extented DDC probing for this connector. Requires [PATCH] drm/radeon: Extended DDC Probing for Connectors with Improperly Wired DDC Lines Tested for kernel 2.6.38 on Toshiba Satellite L300D notebook BugLink: http://bugs.launchpad.net/bugs/826677 Signed-off-by: Thomas Reim Acked-by: Chris Routh Cc: Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_connectors.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 7f65940f918f..4f0c1ecac72e 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -466,6 +466,16 @@ static bool radeon_connector_needs_extended_probe(struct radeon_device *dev, (supported_device == ATOM_DEVICE_DFP2_SUPPORT)) return true; } + /* TOSHIBA Satellite L300D with ATI Mobility Radeon x1100 + * (RS690M) sends data to i2c bus for a HDMI connector that + * is not implemented */ + if ((dev->pdev->device == 0x791f) && + (dev->pdev->subsystem_vendor == 0x1179) && + (dev->pdev->subsystem_device == 0xff68)) { + if ((connector_type == DRM_MODE_CONNECTOR_HDMIA) && + (supported_device == ATOM_DEVICE_DFP2_SUPPORT)) + return true; + } /* Default: no EDID header probe required for DDC probing */ return false; -- cgit v1.2.3 From 5dc06c5a70b79a323152bec7e55783e705767e63 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 14:49:55 +0200 Subject: block: remove READ_META and WRITE_META Replace all occurnanced of the undocumented READ_META with READ | REQ_META and remove the unused WRITE_META define. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- fs/ext3/inode.c | 4 ++-- fs/ext3/namei.c | 2 +- fs/ext4/inode.c | 4 ++-- fs/ext4/namei.c | 2 +- fs/gfs2/quota.c | 2 +- include/linux/fs.h | 2 -- 6 files changed, 7 insertions(+), 9 deletions(-) diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 04da6acde85d..bba7b96e0d9b 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1134,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -2807,7 +2807,7 @@ make_io: trace_ext3_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ_META, bh); + submit_bh(READ | REQ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ext3_error(inode->i_sb, "ext3_get_inode_loc", diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 5571708b6a58..c7d4032aa82a 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -922,7 +922,7 @@ restart: bh = ext3_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c4da98a959ae..1dfa18feeb3e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -650,7 +650,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -3301,7 +3301,7 @@ make_io: trace_ext4_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ_META, bh); + submit_bh(READ | REQ_META, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index f8068c7bae9f..d36315ae629e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -922,7 +922,7 @@ restart: bh = ext4_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 42e8d23bc047..053434049dbb 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -709,7 +709,7 @@ get_a_page: set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { - ll_rw_block(READ_META, 1, &bh); + ll_rw_block(READ | REQ_META, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto unlock_out; diff --git a/include/linux/fs.h b/include/linux/fs.h index 178cdb4f1d4a..eae44c981173 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -162,10 +162,8 @@ struct inodes_stat_t { #define READA RWA_MASK #define READ_SYNC (READ | REQ_SYNC) -#define READ_META (READ | REQ_META) #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) #define WRITE_ODIRECT (WRITE | REQ_SYNC) -#define WRITE_META (WRITE | REQ_META) #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) -- cgit v1.2.3 From 65299a3b788bd274bed92f9fa3232082c9f3ea70 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 23 Aug 2011 14:50:29 +0200 Subject: block: separate priority boosting from REQ_META Add a new REQ_PRIO to let requests preempt others in the cfq I/O schedule, and lave REQ_META purely for marking requests as metadata in blktrace. All existing callers of REQ_META except for XFS are updated to also set REQ_PRIO for now. Signed-off-by: Christoph Hellwig Reviewed-by: Namhyung Kim Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 20 ++++++++++---------- drivers/mmc/card/block.c | 3 +++ fs/ext3/inode.c | 4 ++-- fs/ext3/namei.c | 3 ++- fs/ext4/inode.c | 4 ++-- fs/ext4/namei.c | 3 ++- fs/gfs2/log.c | 4 ++-- fs/gfs2/meta_io.c | 6 +++--- fs/gfs2/ops_fstype.c | 2 +- fs/gfs2/quota.c | 2 +- include/linux/blk_types.h | 6 ++++-- 11 files changed, 32 insertions(+), 25 deletions(-) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a33bd4377c61..16ace89613bc 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -130,8 +130,8 @@ struct cfq_queue { unsigned long slice_end; long slice_resid; - /* pending metadata requests */ - int meta_pending; + /* pending priority requests */ + int prio_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; @@ -684,8 +684,8 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, if (rq_is_sync(rq1) != rq_is_sync(rq2)) return rq_is_sync(rq1) ? rq1 : rq2; - if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_META) - return rq1->cmd_flags & REQ_META ? rq1 : rq2; + if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO) + return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2; s1 = blk_rq_pos(rq1); s2 = blk_rq_pos(rq2); @@ -1612,9 +1612,9 @@ static void cfq_remove_request(struct request *rq) cfqq->cfqd->rq_queued--; cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); - if (rq->cmd_flags & REQ_META) { - WARN_ON(!cfqq->meta_pending); - cfqq->meta_pending--; + if (rq->cmd_flags & REQ_PRIO) { + WARN_ON(!cfqq->prio_pending); + cfqq->prio_pending--; } } @@ -3372,7 +3372,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, * So both queues are sync. Let the new request get disk time if * it's a metadata request and the current queue is doing regular IO. */ - if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) + if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending) return true; /* @@ -3439,8 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic = RQ_CIC(rq); cfqd->rq_queued++; - if (rq->cmd_flags & REQ_META) - cfqq->meta_pending++; + if (rq->cmd_flags & REQ_PRIO) + cfqq->prio_pending++; cfq_update_io_thinktime(cfqd, cfqq, cic); cfq_update_io_seektime(cfqd, cfqq, rq); diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 1ff5486213fb..4c1a648d00fc 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -926,6 +926,9 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, /* * Reliable writes are used to implement Forced Unit Access and * REQ_META accesses, and are supported only on MMCs. + * + * XXX: this really needs a good explanation of why REQ_META + * is treated special. */ bool do_rel_wr = ((req->cmd_flags & REQ_FUA) || (req->cmd_flags & REQ_META)) && diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index bba7b96e0d9b..12661e1deedd 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1134,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -2807,7 +2807,7 @@ make_io: trace_ext3_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ | REQ_META, bh); + submit_bh(READ | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ext3_error(inode->i_sb, "ext3_get_inode_loc", diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index c7d4032aa82a..0629e09f6511 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -922,7 +922,8 @@ restart: bh = ext3_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, + 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1dfa18feeb3e..c7cbb3d85d9e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -650,7 +650,7 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, return bh; if (buffer_uptodate(bh)) return bh; - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -3301,7 +3301,7 @@ make_io: trace_ext4_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(READ | REQ_META, bh); + submit_bh(READ | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { EXT4_ERROR_INODE_BLOCK(inode, block, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d36315ae629e..1c924faeb6c8 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -922,7 +922,8 @@ restart: bh = ext4_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; if (bh) - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, + 1, &bh); } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 85c62923ee29..598646434362 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -624,9 +624,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) bh->b_end_io = end_buffer_write_sync; get_bh(bh); if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) - submit_bh(WRITE_SYNC | REQ_META, bh); + submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh); else - submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); + submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 747238cd9f96..be29858900f6 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb { struct buffer_head *bh, *head; int nr_underway = 0; - int write_op = REQ_META | + int write_op = REQ_META | REQ_PRIO | (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); BUG_ON(!PageLocked(page)); @@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, } bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(READ_SYNC | REQ_META, bh); + submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh); if (!(flags & DIO_WAIT)) return 0; @@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh); + ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh); dblock++; extlen--; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 3bc073a4cf82..079587e53849 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent) bio->bi_end_io = end_bio_io_page; bio->bi_private = page; - submit_bio(READ_SYNC | REQ_META, bio); + submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio); wait_on_page_locked(page); bio_put(bio); if (!PageUptodate(page)) { diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 053434049dbb..0e8bb13381e4 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -709,7 +709,7 @@ get_a_page: set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { - ll_rw_block(READ | REQ_META, 1, &bh); + ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto unlock_out; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 32f0076e844b..71fc53bb8f1c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -124,6 +124,7 @@ enum rq_flag_bits { __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ + __REQ_PRIO, /* boost priority in cfq */ __REQ_DISCARD, /* request to discard sectors */ __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ @@ -161,14 +162,15 @@ enum rq_flag_bits { #define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) #define REQ_SYNC (1 << __REQ_SYNC) #define REQ_META (1 << __REQ_META) +#define REQ_PRIO (1 << __REQ_PRIO) #define REQ_DISCARD (1 << __REQ_DISCARD) #define REQ_NOIDLE (1 << __REQ_NOIDLE) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ - (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \ - REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) + (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ + REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) #define REQ_CLONE_MASK REQ_COMMON_MASK #define REQ_RAHEAD (1 << __REQ_RAHEAD) -- cgit v1.2.3 From 1f015f5fdc4003f3f2a7c66efdb1acf7a2d230bf Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 23 Aug 2011 14:57:08 +0200 Subject: ALSA: hda - Fix double-headphone/speaker paths for Cxt auto-parser When multiple headphones or speakers are assigned but no individual DACs are available, the driver should take the first HP/SPK DAC instead of another primary output. The patch adds a bit-flag to dac field of struct pin_dac_pair indicating that it's a slave DAC. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 502fc9499453..4c462c3d6462 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3348,6 +3348,8 @@ static hda_nid_t get_unassigned_dac(struct hda_codec *codec, hda_nid_t pin, #define MAX_AUTO_DACS 5 +#define DAC_SLAVE_FLAG 0x8000 /* filled dac is a slave */ + /* fill analog DAC list from the widget tree */ static int fill_cx_auto_dacs(struct hda_codec *codec, hda_nid_t *dacs) { @@ -3379,6 +3381,8 @@ static int fill_dacs_for_pins(struct hda_codec *codec, hda_nid_t *pins, filled[nums].pin = pins[i]; filled[nums].type = type; filled[nums].dac = get_unassigned_dac(codec, pins[i], dacs, rest); + if (!filled[nums].dac && i > 0 && filled[0].dac) + filled[nums].dac = filled[0].dac | DAC_SLAVE_FLAG; nums++; } return nums; @@ -3407,7 +3411,7 @@ static void cx_auto_parse_output(struct hda_codec *codec) /* fill multiout struct */ for (i = 0; i < nums; i++) { hda_nid_t dac = spec->dac_info[i].dac; - if (!dac) + if (!dac || (dac & DAC_SLAVE_FLAG)) continue; switch (spec->dac_info[i].type) { case AUTO_PIN_LINE_OUT: @@ -4035,6 +4039,8 @@ static void cx_auto_init_output(struct hda_codec *codec) nid = spec->dac_info[i].dac; if (!nid) nid = spec->multiout.dac_nids[0]; + else if (nid & DAC_SLAVE_FLAG) + nid &= ~DAC_SLAVE_FLAG; select_connection(codec, spec->dac_info[i].pin, nid); } if (spec->auto_mute) { @@ -4191,7 +4197,8 @@ static int cx_auto_build_output_controls(struct hda_codec *codec) for (i = 0; i < spec->dac_info_filled; i++) { const char *label; int idx, type; - if (!spec->dac_info[i].dac) + hda_nid_t dac = spec->dac_info[i].dac; + if (!dac || (dac & DAC_SLAVE_FLAG)) continue; type = spec->dac_info[i].type; if (type == AUTO_PIN_LINE_OUT) @@ -4211,7 +4218,7 @@ static int cx_auto_build_output_controls(struct hda_codec *codec) idx = num_spk++; break; } - err = try_add_pb_volume(codec, spec->dac_info[i].dac, + err = try_add_pb_volume(codec, dac, spec->dac_info[i].pin, label, idx); if (err < 0) -- cgit v1.2.3 From 8c4074cd2254606aeb788d518ccc27c9f97129e1 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 1 Aug 2011 21:20:10 +0800 Subject: tty: Add "spi:" prefix for spi modalias Since commit e0626e38 (spi: prefix modalias with "spi:"), the spi modalias is prefixed with "spi:". This patch adds "spi:" prefix and removes "-spi" suffix in the modalias. Signed-off-by: Axel Lin Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max3107-aava.c | 2 +- drivers/tty/serial/max3107.c | 2 +- drivers/tty/serial/mrst_max3110.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/max3107-aava.c b/drivers/tty/serial/max3107-aava.c index a1fe304f2f52..d73aadd7a9ad 100644 --- a/drivers/tty/serial/max3107-aava.c +++ b/drivers/tty/serial/max3107-aava.c @@ -340,5 +340,5 @@ module_exit(max3107_exit); MODULE_DESCRIPTION("MAX3107 driver"); MODULE_AUTHOR("Aavamobile"); -MODULE_ALIAS("aava-max3107-spi"); +MODULE_ALIAS("spi:aava-max3107"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/tty/serial/max3107.c b/drivers/tty/serial/max3107.c index 750b4f627315..a8164601c0ea 100644 --- a/drivers/tty/serial/max3107.c +++ b/drivers/tty/serial/max3107.c @@ -1209,5 +1209,5 @@ module_exit(max3107_exit); MODULE_DESCRIPTION("MAX3107 driver"); MODULE_AUTHOR("Aavamobile"); -MODULE_ALIAS("max3107-spi"); +MODULE_ALIAS("spi:max3107"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/tty/serial/mrst_max3110.c b/drivers/tty/serial/mrst_max3110.c index a764bf99743b..23bc743f2a22 100644 --- a/drivers/tty/serial/mrst_max3110.c +++ b/drivers/tty/serial/mrst_max3110.c @@ -917,4 +917,4 @@ module_init(serial_m3110_init); module_exit(serial_m3110_exit); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("max3110-uart"); +MODULE_ALIAS("spi:max3110-uart"); -- cgit v1.2.3 From 44178176ecc55ad370b837dd2c4b4b8bed1e3823 Mon Sep 17 00:00:00 2001 From: Eric Smith Date: Mon, 11 Jul 2011 22:53:13 -0600 Subject: 8250_pci: add support for Rosewill RC-305 4x serial port card This patch adds support for the Rosewill RC-305 four-port PCI serial card, and probably any other four-port serial cards based on the Moschip MCS9865 chip, assuming that the EEPROM on the card was programmed in accordance with Table 6 of the MCS9865 EEPROM Application Note version 0.3 dated 16-May-2008, available from the Moschip web site (registration required). This patch is based on an earlier patch [1] for the SYBA 6x serial port card by Ira W. Snyder. [1]: http://www.gossamer-threads.com/lists/linux/kernel/1162435 Signed-off-by: Eric Smith Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250_pci.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/8250_pci.c b/drivers/tty/serial/8250_pci.c index 6b887d90a205..f652a7b8913b 100644 --- a/drivers/tty/serial/8250_pci.c +++ b/drivers/tty/serial/8250_pci.c @@ -4021,13 +4021,17 @@ static struct pci_device_id serial_pci_tbl[] = { 0, 0, pbn_NETMOS9900_2s_115200 }, /* - * Best Connectivity PCI Multi I/O cards + * Best Connectivity and Rosewill PCI Multi I/O cards */ { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865, 0xA000, 0x1000, 0, 0, pbn_b0_1_115200 }, + { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865, + 0xA000, 0x3002, + 0, 0, pbn_b0_bt_2_115200 }, + { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9865, 0xA000, 0x3004, 0, 0, pbn_b0_bt_4_115200 }, -- cgit v1.2.3 From dacacc3e794c4c5bab05d97afc19e372e1877943 Mon Sep 17 00:00:00 2001 From: Tomoya MORINAGA Date: Tue, 12 Jul 2011 16:08:49 +0900 Subject: serial/8250_pci: delete duplicate data definition Data definiton "VendorID=10DB, device_id=800D" is already defined. This patch deletes the duplicate definition. Signed-off-by: Tomoya MORINAGA Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250_pci.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/tty/serial/8250_pci.c b/drivers/tty/serial/8250_pci.c index f652a7b8913b..3abeca2a2a1b 100644 --- a/drivers/tty/serial/8250_pci.c +++ b/drivers/tty/serial/8250_pci.c @@ -1599,11 +1599,6 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .device = 0x800D, .init = pci_eg20t_init, }, - { - .vendor = 0x10DB, - .device = 0x800D, - .init = pci_eg20t_init, - }, /* * Cronyx Omega PCI (PLX-chip based) */ -- cgit v1.2.3 From dbb3b1ca5609d1f3848cd387d06cc60aaacf7f98 Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Mon, 25 Jul 2011 16:19:52 -0400 Subject: 8250: Fix race condition in serial8250_backup_timeout(). This is to fix an issue where output will suddenly become very slow. The problem occurs on 8250 UARTS with the hardware bug UART_BUG_THRE. BACKGROUND For normal UARTs (without UART_BUG_THRE): When the serial core layer gets new transmit data and the transmitter is idle, it buffers the data and calls the 8250s' serial8250_start_tx() routine which will simply enable the TX interrupt in the IER register and return. This should immediately fire a THRE interrupt and begin transmitting the data. For buggy UARTs (with UART_BUG_THRE): merely enabling the TX interrupt in IER does not necessarily generate a new THRE interrupt. Therefore, a background timer periodically checks to see if there is pending data, and starts transmission if that is the case. The bug happens on SMP systems when the system has nothing to transmit, the transmit interrupt is disabled and the following sequence occurs: - CPU0: The background timer routine serial8250_backup_timeout() starts and saves the state of the interrupt enable register (IER) and then disables all interrupts in IER. NOTE: The transmit interrupt (TI) bit is saved as disabled. - CPU1: The serial core gets data to transmit, grabs the port lock and calls serial8250_start_tx() which enables the TI in IER. - CPU0: serial8250_backup_timeout() waits for the port lock. - CPU1: finishes (with TI enabled) and releases the port lock. - CPU0: serial8250_backup_timeout() calls the interrupt routine which will transmit the next fifo's worth of data and then restores the IER from the previously saved value (TI disabled). At this point, as long as the serial core has more transmit data buffered, it will not call serial8250_start_tx() again and the background timer routine will slowly transmit the data. The fix is to have serial8250_start_tx() get the port lock before it saves the IER state and release it after restoring IER. This will prevent serial8250_start_tx() from running in parallel. Signed-off-by: Al Cooper Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/8250.c b/drivers/tty/serial/8250.c index f2dfec82faf8..7f50999eebc2 100644 --- a/drivers/tty/serial/8250.c +++ b/drivers/tty/serial/8250.c @@ -1819,6 +1819,8 @@ static void serial8250_backup_timeout(unsigned long data) unsigned int iir, ier = 0, lsr; unsigned long flags; + spin_lock_irqsave(&up->port.lock, flags); + /* * Must disable interrupts or else we risk racing with the interrupt * based handler. @@ -1836,10 +1838,8 @@ static void serial8250_backup_timeout(unsigned long data) * the "Diva" UART used on the management processor on many HP * ia64 and parisc boxes. */ - spin_lock_irqsave(&up->port.lock, flags); lsr = serial_in(up, UART_LSR); up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; - spin_unlock_irqrestore(&up->port.lock, flags); if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) && (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) && (lsr & UART_LSR_THRE)) { @@ -1848,11 +1848,13 @@ static void serial8250_backup_timeout(unsigned long data) } if (!(iir & UART_IIR_NO_INT)) - serial8250_handle_port(up); + transmit_chars(up); if (is_real_interrupt(up->port.irq)) serial_out(up, UART_IER, ier); + spin_unlock_irqrestore(&up->port.lock, flags); + /* Standard timer interval plus 0.2s to keep the port running */ mod_timer(&up->timer, jiffies + uart_poll_timeout(&up->port) + HZ / 5); -- cgit v1.2.3 From 24d406a6bf736f7aebdc8fa0f0ec86e0890c6d24 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 10 Aug 2011 14:59:28 +0200 Subject: TTY: pty, fix pty counting tty_operations->remove is normally called like: queue_release_one_tty ->tty_shutdown ->tty_driver_remove_tty ->tty_operations->remove However tty_shutdown() is called from queue_release_one_tty() only if tty_operations->shutdown is NULL. But for pty, it is not. pty_unix98_shutdown() is used there as ->shutdown. So tty_operations->remove of pty (i.e. pty_unix98_remove()) is never called. This results in invalid pty_count. I.e. what can be seen in /proc/sys/kernel/pty/nr. I see this was already reported at: https://lkml.org/lkml/2009/11/5/370 But it was not fixed since then. This patch is kind of a hackish way. The problem lies in ->install. We allocate there another tty (so-called tty->link). So ->install is called once, but ->remove twice, for both tty and tty->link. The fix here is to count both tty and tty->link and divide the count by 2 for user. And to have ->remove called, let's make tty_driver_remove_tty() global and call that from pty_unix98_shutdown() (tty_operations->shutdown). While at it, let's document that when ->shutdown is defined, tty_shutdown() is not called. Signed-off-by: Jiri Slaby Cc: Alan Cox Cc: "H. Peter Anvin" Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/pty.c | 17 +++++++++++++++-- drivers/tty/tty_io.c | 3 +-- include/linux/tty.h | 2 ++ include/linux/tty_driver.h | 3 +++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 98b6e3bdb000..e809e9d4683c 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -446,8 +446,19 @@ static inline void legacy_pty_init(void) { } int pty_limit = NR_UNIX98_PTY_DEFAULT; static int pty_limit_min; static int pty_limit_max = NR_UNIX98_PTY_MAX; +static int tty_count; static int pty_count; +static inline void pty_inc_count(void) +{ + pty_count = (++tty_count) / 2; +} + +static inline void pty_dec_count(void) +{ + pty_count = (--tty_count) / 2; +} + static struct cdev ptmx_cdev; static struct ctl_table pty_table[] = { @@ -542,6 +553,7 @@ static struct tty_struct *pts_unix98_lookup(struct tty_driver *driver, static void pty_unix98_shutdown(struct tty_struct *tty) { + tty_driver_remove_tty(tty->driver, tty); /* We have our own method as we don't use the tty index */ kfree(tty->termios); } @@ -588,7 +600,8 @@ static int pty_unix98_install(struct tty_driver *driver, struct tty_struct *tty) */ tty_driver_kref_get(driver); tty->count++; - pty_count++; + pty_inc_count(); /* tty */ + pty_inc_count(); /* tty->link */ return 0; err_free_mem: deinitialize_tty_struct(o_tty); @@ -602,7 +615,7 @@ err_free_tty: static void pty_unix98_remove(struct tty_driver *driver, struct tty_struct *tty) { - pty_count--; + pty_dec_count(); } static const struct tty_operations ptm_unix98_ops = { diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 150e4f747c7d..4f1fc81112e6 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1295,8 +1295,7 @@ static int tty_driver_install_tty(struct tty_driver *driver, * * Locking: tty_mutex for now */ -static void tty_driver_remove_tty(struct tty_driver *driver, - struct tty_struct *tty) +void tty_driver_remove_tty(struct tty_driver *driver, struct tty_struct *tty) { if (driver->ops->remove) driver->ops->remove(driver, tty); diff --git a/include/linux/tty.h b/include/linux/tty.h index 44bc0c5617e1..5f2ede82b3d6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -421,6 +421,8 @@ extern void tty_driver_flush_buffer(struct tty_struct *tty); extern void tty_throttle(struct tty_struct *tty); extern void tty_unthrottle(struct tty_struct *tty); extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); +extern void tty_driver_remove_tty(struct tty_driver *driver, + struct tty_struct *tty); extern void tty_shutdown(struct tty_struct *tty); extern void tty_free_termios(struct tty_struct *tty); extern int is_current_pgrp_orphaned(void); diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 9deeac855240..ecdaeb98b293 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -47,6 +47,9 @@ * * This routine is called synchronously when a particular tty device * is closed for the last time freeing up the resources. + * Note that tty_shutdown() is not called if ops->shutdown is defined. + * This means one is responsible to take care of calling ops->remove (e.g. + * via tty_driver_remove_tty) and releasing tty->termios. * * * void (*cleanup)(struct tty_struct * tty); -- cgit v1.2.3 From 0055197e984e5fbe6f48f37fc50dd30254915493 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 17 Aug 2011 13:48:15 +0200 Subject: TTY: serial, document ignoring of uart->ops->startup error When a user has SYS_ADMIN capabilities and uart->ops->startup returns an error in uart_startup, we silently drop the error. We then return 0 and behave as if it didn't fail. (Not quite, since we set TTY_IO_ERROR bit and leave ASYNC_INITIALIZED bit cleared.) This all is to allow setserial to work with improperly configured or unconfigured ports. User can thus set port properties and reconfigure properly. This patch only documents this behavior. Signed-off-by: Jiri Slaby Cc: Alan Cox Cc: Russel King Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/serial_core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index db7912cb7ae0..a3efbea5dbba 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -200,6 +200,11 @@ static int uart_startup(struct tty_struct *tty, struct uart_state *state, int in clear_bit(TTY_IO_ERROR, &tty->flags); } + /* + * This is to allow setserial on this port. People may want to set + * port/irq/type and then reconfigure the port properly if it failed + * now. + */ if (retval && capable(CAP_SYS_ADMIN)) retval = 0; -- cgit v1.2.3 From 69dd3d8e29e294caaf63eb5e8a72d250279f9e5f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 23 Aug 2011 10:36:51 -0700 Subject: Revert "irq: Always set IRQF_ONESHOT if no primary handler is specified" This reverts commit f3637a5f2e2eb391ff5757bc83fb5de8f9726464. It turns out that this breaks several drivers, one example being OMAP boards which use the on-board OMAP UARTs and the omap-serial driver that will not boot to userspace after the commit. Paul Walmsley reports that enabling CONFIG_DEBUG_SHIRQ reveals 'IRQ handler type mismatch' errors: IRQ handler type mismatch for IRQ 74 current handler: serial idle ... and the reason is that setting IRQF_ONESHOT will now result in those interrupt handlers having different IRQF flags, and thus being unsharable. So the commit log in the reverted commit: "Since it is required for those users and there is no difference for others it makes sense to add this flag unconditionally." is simply not true: there may not be any difference from a "actions at irq time", but there is a *big* difference wrt this flag testing irq management (see __setup_irq() in kernel/irq/manage.c). One solution may be to stop verifying IRQF_ONESHOT in __setup_irq(), but right now the safe course of action is to revert the change. Let's revisit this in a later merge window. Reported-by: Paul Walmsley Cc: Sebastian Andrzej Siewior Requested-by: Alan Cox Acked-by: Thomas Gleixner Signed-off-by: Linus Torvalds --- kernel/irq/manage.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 2e9425889fa8..9b956fa20308 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1331,7 +1331,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, if (!thread_fn) return -EINVAL; handler = irq_default_primary_handler; - irqflags |= IRQF_ONESHOT; } action = kzalloc(sizeof(struct irqaction), GFP_KERNEL); -- cgit v1.2.3 From b280a97d1caf6fe1d38b51ebb31219391f5ad1a0 Mon Sep 17 00:00:00 2001 From: Nick Pelly Date: Fri, 15 Jul 2011 13:53:08 -0700 Subject: omap-serial: Allow IXON and IXOFF to be disabled. Fixes logic bug that software flow control cannot be disabled, because serial_omap_configure_xonxoff() is not called if both IXON and IXOFF bits are cleared. Signed-off-by: Nick Pelly Acked-by: Govindraj.R Tested-by: Govindraj.R Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/omap-serial.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c index c37df8d0fa28..5e713d3ef1f4 100644 --- a/drivers/tty/serial/omap-serial.c +++ b/drivers/tty/serial/omap-serial.c @@ -806,8 +806,7 @@ serial_omap_set_termios(struct uart_port *port, struct ktermios *termios, serial_omap_set_mctrl(&up->port, up->port.mctrl); /* Software Flow Control Configuration */ - if (termios->c_iflag & (IXON | IXOFF)) - serial_omap_configure_xonxoff(up, termios); + serial_omap_configure_xonxoff(up, termios); spin_unlock_irqrestore(&up->port.lock, flags); dev_dbg(up->port.dev, "serial_omap_set_termios+%d\n", up->pdev->id); -- cgit v1.2.3 From 4b723a471050a8b80f7fa86e76f01f4c711b3443 Mon Sep 17 00:00:00 2001 From: srinidhi kasagar Date: Tue, 9 Aug 2011 20:17:22 +0200 Subject: i2c-nomadik: Do not use _interruptible_ variant call If there is a signal pending and wait_for_completion_interruptible_timeout exited because of the -ERESTARTSYS error we are unable to send any more i2c messages. So, deprecate this _interruptible_ variant call. Signed-off-by: Srinidhi Kasagar Signed-off-by: Linus Walleij Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-nomadik.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index 0c731ca69f15..f9b8854fe0a5 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -417,12 +417,12 @@ static int read_i2c(struct nmk_i2c_dev *dev) writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask, dev->virtbase + I2C_IMSCR); - timeout = wait_for_completion_interruptible_timeout( + timeout = wait_for_completion_timeout( &dev->xfer_complete, dev->adap.timeout); if (timeout < 0) { dev_err(&dev->pdev->dev, - "wait_for_completion_interruptible_timeout" + "wait_for_completion_timeout" "returned %d waiting for event\n", timeout); status = timeout; } @@ -504,12 +504,12 @@ static int write_i2c(struct nmk_i2c_dev *dev) writel(readl(dev->virtbase + I2C_IMSCR) | irq_mask, dev->virtbase + I2C_IMSCR); - timeout = wait_for_completion_interruptible_timeout( + timeout = wait_for_completion_timeout( &dev->xfer_complete, dev->adap.timeout); if (timeout < 0) { dev_err(&dev->pdev->dev, - "wait_for_completion_interruptible_timeout" + "wait_for_completion_timeout" "returned %d waiting for event\n", timeout); status = timeout; } -- cgit v1.2.3 From 584b408d37af4e0b38ad5b60f236381bcdf396bc Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 4 Aug 2011 07:53:02 -0700 Subject: Revert "i2c-omap: fix static suspend vs. runtime suspend" This reverts commit adf6e07922255937c8bfeea777d19502b4c9a2be. Remove system PM methods which can race with runtime PM methods. Also, as of v3.1, the PM domain level code for OMAP handles device power state transistions automatically for devices, so drivers no longer need to specifically call the bus/pm_domain methods themselves. Signed-off-by: Kevin Hilman Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-omap.c | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c index 1a766cf74f6b..2dfb63176856 100644 --- a/drivers/i2c/busses/i2c-omap.c +++ b/drivers/i2c/busses/i2c-omap.c @@ -1139,41 +1139,12 @@ omap_i2c_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_SUSPEND -static int omap_i2c_suspend(struct device *dev) -{ - if (!pm_runtime_suspended(dev)) - if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_suspend) - dev->bus->pm->runtime_suspend(dev); - - return 0; -} - -static int omap_i2c_resume(struct device *dev) -{ - if (!pm_runtime_suspended(dev)) - if (dev->bus && dev->bus->pm && dev->bus->pm->runtime_resume) - dev->bus->pm->runtime_resume(dev); - - return 0; -} - -static struct dev_pm_ops omap_i2c_pm_ops = { - .suspend = omap_i2c_suspend, - .resume = omap_i2c_resume, -}; -#define OMAP_I2C_PM_OPS (&omap_i2c_pm_ops) -#else -#define OMAP_I2C_PM_OPS NULL -#endif - static struct platform_driver omap_i2c_driver = { .probe = omap_i2c_probe, .remove = omap_i2c_remove, .driver = { .name = "omap_i2c", .owner = THIS_MODULE, - .pm = OMAP_I2C_PM_OPS, }, }; -- cgit v1.2.3 From 80900d0140a7648587982c8f299830e900e49165 Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Mon, 22 Aug 2011 23:19:48 +0300 Subject: wl12xx: Remove obsolete testmode NVS push command The testmode NVS push command is no longer in use. In addition, it has several implementation issues that prevent it from working correctly: 1. wl1271_tm_cmd_configure relies on wl->chip.id being set. However, since the device was not necessarily booted by the time the function is called, wl->chip.id will be initialized to 0. 2. The NVS file is fetched by calling request_firmware() before it is possible to push an NVS file. 3. The maximum allowed size of nl binary payloads is not sufficient for pushing NVS files. 4. Pushing 128x NVS files will always fail due to a bug in the validation code. 5. In case the pushed NVS file is found invalid, the mutex will be kept locked and the nvs member will become a dangling pointer. Since this feature is not being used, remove it completely instead of fixing it. Signed-off-by: Ido Yariv Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/testmode.c | 45 ---------------------------------- 1 file changed, 45 deletions(-) diff --git a/drivers/net/wireless/wl12xx/testmode.c b/drivers/net/wireless/wl12xx/testmode.c index 88add68bd9ac..4ae8effaee22 100644 --- a/drivers/net/wireless/wl12xx/testmode.c +++ b/drivers/net/wireless/wl12xx/testmode.c @@ -36,7 +36,6 @@ enum wl1271_tm_commands { WL1271_TM_CMD_TEST, WL1271_TM_CMD_INTERROGATE, WL1271_TM_CMD_CONFIGURE, - WL1271_TM_CMD_NVS_PUSH, WL1271_TM_CMD_SET_PLT_MODE, WL1271_TM_CMD_RECOVER, @@ -190,48 +189,6 @@ static int wl1271_tm_cmd_configure(struct wl1271 *wl, struct nlattr *tb[]) return 0; } -static int wl1271_tm_cmd_nvs_push(struct wl1271 *wl, struct nlattr *tb[]) -{ - int ret = 0; - size_t len; - void *buf; - - wl1271_debug(DEBUG_TESTMODE, "testmode cmd nvs push"); - - if (!tb[WL1271_TM_ATTR_DATA]) - return -EINVAL; - - buf = nla_data(tb[WL1271_TM_ATTR_DATA]); - len = nla_len(tb[WL1271_TM_ATTR_DATA]); - - mutex_lock(&wl->mutex); - - kfree(wl->nvs); - - if ((wl->chip.id == CHIP_ID_1283_PG20) && - (len != sizeof(struct wl128x_nvs_file))) - return -EINVAL; - else if (len != sizeof(struct wl1271_nvs_file)) - return -EINVAL; - - wl->nvs = kzalloc(len, GFP_KERNEL); - if (!wl->nvs) { - wl1271_error("could not allocate memory for the nvs file"); - ret = -ENOMEM; - goto out; - } - - memcpy(wl->nvs, buf, len); - wl->nvs_len = len; - - wl1271_debug(DEBUG_TESTMODE, "testmode pushed nvs"); - -out: - mutex_unlock(&wl->mutex); - - return ret; -} - static int wl1271_tm_cmd_set_plt_mode(struct wl1271 *wl, struct nlattr *tb[]) { u32 val; @@ -288,8 +245,6 @@ int wl1271_tm_cmd(struct ieee80211_hw *hw, void *data, int len) return wl1271_tm_cmd_interrogate(wl, tb); case WL1271_TM_CMD_CONFIGURE: return wl1271_tm_cmd_configure(wl, tb); - case WL1271_TM_CMD_NVS_PUSH: - return wl1271_tm_cmd_nvs_push(wl, tb); case WL1271_TM_CMD_SET_PLT_MODE: return wl1271_tm_cmd_set_plt_mode(wl, tb); case WL1271_TM_CMD_RECOVER: -- cgit v1.2.3 From a15f1c45f393982196c981a8df8b534cc9f3bb80 Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Mon, 22 Aug 2011 23:19:49 +0300 Subject: wl12xx: Fix validation of pm_runtime_get_sync return value wl1271_sdio_power_on checks if the return value of pm_runtime_get_sync is non-zero, and if so bails out. However, pm_runtime_get_sync can return a positive number which does not suggest an error has occurred. This is problematic for two reasons: 1. The function will needlessly bail out without decrementing back the runtime PM reference counter. 2. wl1271_power_on only checks if wl1271_power_on return value is negative. This means that wl1271_power_on will continue even if wl1271_sdio_power_on bailed out. As a result, sdio transactions will be initiated without properly enabling the sdio function and claiming the host. This could even lead to a kernel panic. Fix this by only checking that the return value of pm_runtime_get_sync is non-negative. Signed-off-by: Ido Yariv Acked-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/sdio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/wl12xx/sdio.c b/drivers/net/wireless/wl12xx/sdio.c index 5cf18c2c23f0..fb1fd5af75ea 100644 --- a/drivers/net/wireless/wl12xx/sdio.c +++ b/drivers/net/wireless/wl12xx/sdio.c @@ -164,7 +164,7 @@ static int wl1271_sdio_power_on(struct wl1271 *wl) /* If enabled, tell runtime PM not to power off the card */ if (pm_runtime_enabled(&func->dev)) { ret = pm_runtime_get_sync(&func->dev); - if (ret) + if (ret < 0) goto out; } else { /* Runtime PM is disabled: power up the card manually */ -- cgit v1.2.3 From 7a5e4877c14de0827dbda8efa5080089757a8733 Mon Sep 17 00:00:00 2001 From: Luciano Coelho Date: Tue, 23 Aug 2011 11:42:25 +0300 Subject: wl12xx: add max_sched_scan_ssids value to the hw description After commit 5a865ba, we require a separate value to indicate the number of supported SSIDs in scheduled scans. This patch adds a proper value to the wl12xx driver. This fixes a regression in 3.1-rc3 where scheduled scans were not working properly with the wl12xx driver. Signed-off-by: Luciano Coelho Signed-off-by: John W. Linville --- drivers/net/wireless/wl12xx/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/wl12xx/main.c b/drivers/net/wireless/wl12xx/main.c index e58c22d21e39..b70ae40ad660 100644 --- a/drivers/net/wireless/wl12xx/main.c +++ b/drivers/net/wireless/wl12xx/main.c @@ -4283,6 +4283,7 @@ int wl1271_init_ieee80211(struct wl1271 *wl) wl->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_AP); wl->hw->wiphy->max_scan_ssids = 1; + wl->hw->wiphy->max_sched_scan_ssids = 1; /* * Maximum length of elements in scanning probe request templates * should be the maximum length possible for a template, without -- cgit v1.2.3 From 9818a4775a3ab18b84a689537088b3d72a742130 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Mon, 8 Aug 2011 15:57:45 +0200 Subject: staging: brcm80211: fix compile error on non-x86 archs since 3.0 kernel Since the arrival of kernel version 3.0 in the staging tree it turns out compile error occurs for sparc64, powerpc, and arm platforms. This patch fixes that issue. Reviewed-by: Pieter-Paul Giesberts Reviewed-by: Henry Ptasinski Signed-off-by: Arend van Spriel Signed-off-by: Greg Kroah-Hartman --- drivers/staging/brcm80211/brcmsmac/types.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/brcm80211/brcmsmac/types.h b/drivers/staging/brcm80211/brcmsmac/types.h index bbf21897ae0e..823b5e4672e2 100644 --- a/drivers/staging/brcm80211/brcmsmac/types.h +++ b/drivers/staging/brcm80211/brcmsmac/types.h @@ -18,6 +18,7 @@ #define _BRCM_TYPES_H_ #include +#include /* Bus types */ #define SI_BUS 0 /* SOC Interconnect */ -- cgit v1.2.3 From 20cc7995fe66ce6417678bb0db6b3d4955fb1ff6 Mon Sep 17 00:00:00 2001 From: Pieter-Paul Giesberts Date: Mon, 8 Aug 2011 15:59:03 +0200 Subject: staging: brcm80211: SPARC build error fix Due to missing memset function declaration. Reviewed-by: Roland Vossen Signed-off-by: Arend van Spriel Signed-off-by: Greg Kroah-Hartman --- drivers/staging/brcm80211/brcmsmac/otp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/brcm80211/brcmsmac/otp.c b/drivers/staging/brcm80211/brcmsmac/otp.c index 34253cf37812..4a70180eba5d 100644 --- a/drivers/staging/brcm80211/brcmsmac/otp.c +++ b/drivers/staging/brcm80211/brcmsmac/otp.c @@ -16,6 +16,7 @@ #include #include +#include #include #include -- cgit v1.2.3 From ba8f318471f66d5d5b79da68112525cf432b2b18 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Thu, 18 Aug 2011 09:37:02 +0100 Subject: m68k: fix __page_to_pfn for a const struct page argument Fixes fallout due to the removal of the cast in commit aa462abe8aaf ("mm: fix __page_to_pfn for a const struct page argument") Signed-off-by: Ian Campbell Cc: Andrew Morton Acked-by: Geert Uytterhoeven Cc: linux-m68k@lists.linux-m68k.org Signed-off-by: Linus Torvalds --- arch/m68k/include/asm/page_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h index 31d5570d6567..89f201434b5a 100644 --- a/arch/m68k/include/asm/page_mm.h +++ b/arch/m68k/include/asm/page_mm.h @@ -162,7 +162,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void) pgdat->node_mem_map + (__pfn - pgdat->node_start_pfn); \ }) #define page_to_pfn(_page) ({ \ - struct page *__p = (_page); \ + const struct page *__p = (_page); \ struct pglist_data *pgdat; \ pgdat = &pg_data_map[page_to_nid(__p)]; \ ((__p) - pgdat->node_mem_map) + pgdat->node_start_pfn; \ -- cgit v1.2.3 From c5f5c4db393837ebb2ae47bf061d70e498f48f8c Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Wed, 10 Aug 2011 12:56:49 -0500 Subject: staging: zcache: fix crash on high memory swap zcache_put_page() was modified to pass page_address(page) instead of the actual page structure. In combination with the function signature changes to tmem_put() and zcache_pampd_create(), zcache_pampd_create() tries to (re)derive the page structure from the virtual address. However, if the original page is a high memory page (or any unmapped page), this virt_to_page() fails because the page_address() in zcache_put_page() returned NULL. This patch changes zcache_put_page() and zcache_get_page() to pass the page structure instead of the page's virtual address, which may or may not exist. Signed-off-by: Seth Jennings Acked-by: Dan Magenheimer Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zcache/zcache-main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 855a5bb56a47..a3f5162bfedc 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1158,7 +1158,7 @@ static void *zcache_pampd_create(char *data, size_t size, bool raw, int eph, size_t clen; int ret; unsigned long count; - struct page *page = virt_to_page(data); + struct page *page = (struct page *)(data); struct zcache_client *cli = pool->client; uint16_t client_id = get_client_id_from_client(cli); unsigned long zv_mean_zsize; @@ -1227,7 +1227,7 @@ static int zcache_pampd_get_data(char *data, size_t *bufsize, bool raw, int ret = 0; BUG_ON(is_ephemeral(pool)); - zv_decompress(virt_to_page(data), pampd); + zv_decompress((struct page *)(data), pampd); return ret; } @@ -1539,7 +1539,7 @@ static int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp, goto out; if (!zcache_freeze && zcache_do_preload(pool) == 0) { /* preload does preempt_disable on success */ - ret = tmem_put(pool, oidp, index, page_address(page), + ret = tmem_put(pool, oidp, index, (char *)(page), PAGE_SIZE, 0, is_ephemeral(pool)); if (ret < 0) { if (is_ephemeral(pool)) @@ -1572,7 +1572,7 @@ static int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp, pool = zcache_get_pool_by_id(cli_id, pool_id); if (likely(pool != NULL)) { if (atomic_read(&pool->obj_count) > 0) - ret = tmem_get(pool, oidp, index, page_address(page), + ret = tmem_get(pool, oidp, index, (char *)(page), &size, 0, is_ephemeral(pool)); zcache_put_pool(pool); } -- cgit v1.2.3 From 1dcab0875b113a148b6601d87b4e0e3444440339 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 9 Aug 2011 21:01:33 +0300 Subject: Staging: zcache: signedness bug in tmem_get() "ret" needs to be signed for the error handling to work properly. Signed-off-by: Dan Carpenter Acked-by: Dan Magenheimer Signed-off-by: Greg Kroah-Hartman --- drivers/staging/zcache/tmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c index 975e34bcd722..1ca66ea9b281 100644 --- a/drivers/staging/zcache/tmem.c +++ b/drivers/staging/zcache/tmem.c @@ -604,7 +604,7 @@ int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index, struct tmem_obj *obj; void *pampd; bool ephemeral = is_ephemeral(pool); - uint32_t ret = -1; + int ret = -1; struct tmem_hashbucket *hb; bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral); bool lock_held = false; -- cgit v1.2.3 From 048316be72893455f69ad728fa94c26e2e582ba2 Mon Sep 17 00:00:00 2001 From: David Daney Date: Tue, 16 Aug 2011 10:10:56 -0700 Subject: staging: octeon-ethernet: Add missing #includes. I looks like something used to implicitly include linux/interrupt.h, and no longer does. Fix the resulting build error by explicitly including it. Signed-off-by: David Daney Cc: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-rgmii.c | 1 + drivers/staging/octeon/ethernet-spi.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/staging/octeon/ethernet-rgmii.c b/drivers/staging/octeon/ethernet-rgmii.c index 9c0d2936e486..c3d73f8431ae 100644 --- a/drivers/staging/octeon/ethernet-rgmii.c +++ b/drivers/staging/octeon/ethernet-rgmii.c @@ -26,6 +26,7 @@ **********************************************************************/ #include #include +#include #include #include #include diff --git a/drivers/staging/octeon/ethernet-spi.c b/drivers/staging/octeon/ethernet-spi.c index 970825421884..d0e2d514968a 100644 --- a/drivers/staging/octeon/ethernet-spi.c +++ b/drivers/staging/octeon/ethernet-spi.c @@ -26,6 +26,7 @@ **********************************************************************/ #include #include +#include #include #include -- cgit v1.2.3 From 7ca0758cdb7c241cb4e0490a8d95f0eb5b861daf Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 22 Aug 2011 13:27:06 -0700 Subject: x86-32, vdso: On system call restart after SYSENTER, use int $0x80 When we enter a 32-bit system call via SYSENTER or SYSCALL, we shuffle the arguments to match the int $0x80 calling convention. This was probably a design mistake, but it's what it is now. This causes errors if the system call as to be restarted. For SYSENTER, we have to invoke the instruction from the vdso as the return address is hardcoded. Accordingly, we can simply replace the jump in the vdso with an int $0x80 instruction and use the slower entry point for a post-restart. Suggested-by: Linus Torvalds Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/CA%2B55aFztZ=r5wa0x26KJQxvZOaQq8s2v3u50wCyJcA-Sc4g8gQ@mail.gmail.com Cc: --- arch/x86/vdso/vdso32/sysenter.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S index e2800affa754..e354bceee0e0 100644 --- a/arch/x86/vdso/vdso32/sysenter.S +++ b/arch/x86/vdso/vdso32/sysenter.S @@ -43,7 +43,7 @@ __kernel_vsyscall: .space 7,0x90 /* 14: System call restart point is here! (SYSENTER_RETURN-2) */ - jmp .Lenter_kernel + int $0x80 /* 16: System call normal return point is here! */ VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */ pop %ebp -- cgit v1.2.3 From 1a878284473284f9577d44babf16d87152a05c33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Jul 2011 17:16:40 -0700 Subject: [SCSI] isci: fix sata response handling A bug (likely copy/paste) that has been carried from the original implementation. The unsolicited frame handling structure returns the d2h fis in the isci_request.stp.rsp buffer. Cc: Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/request.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index a46e07ac789f..b4cf998385b3 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -2399,22 +2399,19 @@ static void isci_task_save_for_upper_layer_completion( } } -static void isci_request_process_stp_response(struct sas_task *task, - void *response_buffer) +static void isci_process_stp_response(struct sas_task *task, struct dev_to_host_fis *fis) { - struct dev_to_host_fis *d2h_reg_fis = response_buffer; struct task_status_struct *ts = &task->task_status; struct ata_task_resp *resp = (void *)&ts->buf[0]; - resp->frame_len = le16_to_cpu(*(__le16 *)(response_buffer + 6)); - memcpy(&resp->ending_fis[0], response_buffer + 16, 24); + resp->frame_len = sizeof(*fis); + memcpy(resp->ending_fis, fis, sizeof(*fis)); ts->buf_valid_size = sizeof(*resp); - /** - * If the device fault bit is set in the status register, then + /* If the device fault bit is set in the status register, then * set the sense data and return. */ - if (d2h_reg_fis->status & ATA_DF) + if (fis->status & ATA_DF) ts->stat = SAS_PROTO_RESPONSE; else ts->stat = SAM_STAT_GOOD; @@ -2428,7 +2425,6 @@ static void isci_request_io_request_complete(struct isci_host *ihost, { struct sas_task *task = isci_request_access_task(request); struct ssp_response_iu *resp_iu; - void *resp_buf; unsigned long task_flags; struct isci_remote_device *idev = isci_lookup_device(task->dev); enum service_response response = SAS_TASK_UNDELIVERED; @@ -2565,9 +2561,7 @@ static void isci_request_io_request_complete(struct isci_host *ihost, task); if (sas_protocol_ata(task->task_proto)) { - resp_buf = &request->stp.rsp; - isci_request_process_stp_response(task, - resp_buf); + isci_process_stp_response(task, &request->stp.rsp); } else if (SAS_PROTOCOL_SSP == task->task_proto) { /* crack the iu response buffer. */ -- cgit v1.2.3 From ee33e2b771f9e9e4aaba2bb2ace7b727fe451a8b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Jul 2011 17:16:45 -0700 Subject: [SCSI] isci: fix 32-bit operation when CONFIG_HIGHMEM64G=n The unsolicited frame control infrastructure requires a table of dma addresses for the hardware to lookup the frame buffer location by an index. The hardware expects the elements of this table to be 64-bit quantities, so we cannot reference these elements as dma_addr_t. All unsolicited frame protocols are affected, particularly SATA-PIO and SMP which prevented direct-attached SATA drives and expander-attached drives to not be discovered. Cc: Reported-by: Jacek Danecki Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/unsolicited_frame_control.c | 2 +- drivers/scsi/isci/unsolicited_frame_control.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/isci/unsolicited_frame_control.c b/drivers/scsi/isci/unsolicited_frame_control.c index e9e1e2abacb9..16f88ab939c8 100644 --- a/drivers/scsi/isci/unsolicited_frame_control.c +++ b/drivers/scsi/isci/unsolicited_frame_control.c @@ -72,7 +72,7 @@ int sci_unsolicited_frame_control_construct(struct isci_host *ihost) */ buf_len = SCU_MAX_UNSOLICITED_FRAMES * SCU_UNSOLICITED_FRAME_BUFFER_SIZE; header_len = SCU_MAX_UNSOLICITED_FRAMES * sizeof(struct scu_unsolicited_frame_header); - size = buf_len + header_len + SCU_MAX_UNSOLICITED_FRAMES * sizeof(dma_addr_t); + size = buf_len + header_len + SCU_MAX_UNSOLICITED_FRAMES * sizeof(uf_control->address_table.array[0]); /* * The Unsolicited Frame buffers are set at the start of the UF diff --git a/drivers/scsi/isci/unsolicited_frame_control.h b/drivers/scsi/isci/unsolicited_frame_control.h index 31cb9506f52d..75d896686f5a 100644 --- a/drivers/scsi/isci/unsolicited_frame_control.h +++ b/drivers/scsi/isci/unsolicited_frame_control.h @@ -214,7 +214,7 @@ struct sci_uf_address_table_array { * starting address of the UF address table. * 64-bit pointers are required by the hardware. */ - dma_addr_t *array; + u64 *array; /** * This field specifies the physical address location for the UF -- cgit v1.2.3 From 985af6f70dbb8a33b3af8a7c7df508d924650e37 Mon Sep 17 00:00:00 2001 From: Marcin Tomczak Date: Fri, 29 Jul 2011 17:16:50 -0700 Subject: [SCSI] isci: change sas phy timeouts from 54us to 59us Need the following workaround in the driver for interoperability with the older Intel SSD drives and any other SATA drive that may exhibit the same behavior. This is a corner case where SCU speed is limited to either 3G or 1.5G and the drive has a period of DC idle when it switches speed during SATA speed negotiation. Workaround :change PHYTOV[31:24] from 0x36 to 0x3B. Signed-off-by: Marcin Tomczak Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/phy.c | 13 +++++++++++++ drivers/scsi/isci/registers.h | 12 ++++++++++++ 2 files changed, 25 insertions(+) diff --git a/drivers/scsi/isci/phy.c b/drivers/scsi/isci/phy.c index 79313a7a2356..430fc8ff014a 100644 --- a/drivers/scsi/isci/phy.c +++ b/drivers/scsi/isci/phy.c @@ -104,6 +104,7 @@ sci_phy_link_layer_initialization(struct isci_phy *iphy, u32 parity_count = 0; u32 llctl, link_rate; u32 clksm_value = 0; + u32 sp_timeouts = 0; iphy->link_layer_registers = reg; @@ -211,6 +212,18 @@ sci_phy_link_layer_initialization(struct isci_phy *iphy, llctl |= SCU_SAS_LLCTL_GEN_VAL(MAX_LINK_RATE, link_rate); writel(llctl, &iphy->link_layer_registers->link_layer_control); + sp_timeouts = readl(&iphy->link_layer_registers->sas_phy_timeouts); + + /* Clear the default 0x36 (54us) RATE_CHANGE timeout value. */ + sp_timeouts &= ~SCU_SAS_PHYTOV_GEN_VAL(RATE_CHANGE, 0xFF); + + /* Set RATE_CHANGE timeout value to 0x3B (59us). This ensures SCU can + * lock with 3Gb drive when SCU max rate is set to 1.5Gb. + */ + sp_timeouts |= SCU_SAS_PHYTOV_GEN_VAL(RATE_CHANGE, 0x3B); + + writel(sp_timeouts, &iphy->link_layer_registers->sas_phy_timeouts); + if (is_a2(ihost->pdev)) { /* Program the max ARB time for the PHY to 700us so we inter-operate with * the PMC expander which shuts down PHYs if the expander PHY generates too diff --git a/drivers/scsi/isci/registers.h b/drivers/scsi/isci/registers.h index 9b266c7428e8..00afc738bbed 100644 --- a/drivers/scsi/isci/registers.h +++ b/drivers/scsi/isci/registers.h @@ -1299,6 +1299,18 @@ struct scu_transport_layer_registers { #define SCU_AFE_XCVRCR_OFFSET 0x00DC #define SCU_AFE_LUTCR_OFFSET 0x00E0 +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_ALIGN_DETECTION_SHIFT (0UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_ALIGN_DETECTION_MASK (0x000000FFUL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_HOT_PLUG_SHIFT (8UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_HOT_PLUG_MASK (0x0000FF00UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_COMSAS_DETECTION_SHIFT (16UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_COMSAS_DETECTION_MASK (0x00FF0000UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_RATE_CHANGE_SHIFT (24UL) +#define SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_RATE_CHANGE_MASK (0xFF000000UL) + +#define SCU_SAS_PHYTOV_GEN_VAL(name, value) \ + SCU_GEN_VALUE(SCU_SAS_PHY_TIMER_TIMEOUT_VALUES_##name, value) + #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_SHIFT (0) #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_MASK (0x00000003) #define SCU_SAS_LINK_LAYER_CONTROL_MAX_LINK_RATE_GEN1 (0) -- cgit v1.2.3 From 4ac13e177904280a2502c27029a72e3fd2957cde Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Jul 2011 17:16:55 -0700 Subject: [SCSI] isci: Update MAINTAINERS entry for the isci driver Signed-off-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- MAINTAINERS | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 069ee3b5c651..4fe6854c57d9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3262,6 +3262,17 @@ F: Documentation/input/multi-touch-protocol.txt F: drivers/input/input-mt.c K: \b(ABS|SYN)_MT_ +INTEL C600 SERIES SAS CONTROLLER DRIVER +M: Intel SCU Linux support +M: Dan Williams +M: Dave Jiang +M: Ed Nadolski +L: linux-scsi@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/djbw/isci.git +S: Maintained +F: drivers/scsi/isci/ +F: firmware/isci/ + INTEL IDLE DRIVER M: Len Brown L: linux-pm@lists.linux-foundation.org -- cgit v1.2.3 From 3a7bda830fad427768ed71c0ebf3448849c006b5 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 29 Jul 2011 17:17:00 -0700 Subject: [SCSI] isci: Adding documentation to API change and fixup sysfs registration Adding API update for adding isci_id entry scsi_host sysfs entry. Also fixing up the sysfs registration to the scsi_host template Signed-off-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- Documentation/ABI/testing/sysfs-class-scsi_host | 13 +++++++++ drivers/scsi/isci/init.c | 36 ++++++++++++------------- 2 files changed, 31 insertions(+), 18 deletions(-) create mode 100644 Documentation/ABI/testing/sysfs-class-scsi_host diff --git a/Documentation/ABI/testing/sysfs-class-scsi_host b/Documentation/ABI/testing/sysfs-class-scsi_host new file mode 100644 index 000000000000..29a4f892e433 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-scsi_host @@ -0,0 +1,13 @@ +What: /sys/class/scsi_host/hostX/isci_id +Date: June 2011 +Contact: Dave Jiang +Description: + This file contains the enumerated host ID for the Intel + SCU controller. The Intel(R) C600 Series Chipset SATA/SAS + Storage Control Unit embeds up to two 4-port controllers in + a single PCI device. The controllers are enumerated in order + which usually means the lowest number scsi_host corresponds + with the first controller, but this association is not + guaranteed. The 'isci_id' attribute unambiguously identifies + the controller index: '0' for the first controller, + '1' for the second. diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index 61e0d09e2b57..e78320bbec4f 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "isci.h" #include "task.h" #include "probe_roms.h" @@ -113,6 +114,22 @@ unsigned char max_concurr_spinup = 1; module_param(max_concurr_spinup, byte, 0); MODULE_PARM_DESC(max_concurr_spinup, "Max concurrent device spinup"); +static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct Scsi_Host *shost = container_of(dev, typeof(*shost), shost_dev); + struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost); + struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha); + + return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id); +} + +static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL); + +struct device_attribute *isci_host_attrs[] = { + &dev_attr_isci_id, + NULL +}; + static struct scsi_host_template isci_sht = { .module = THIS_MODULE, @@ -138,6 +155,7 @@ static struct scsi_host_template isci_sht = { .slave_alloc = sas_slave_alloc, .target_destroy = sas_target_destroy, .ioctl = sas_ioctl, + .shost_attrs = isci_host_attrs, }; static struct sas_domain_function_template isci_transport_ops = { @@ -232,17 +250,6 @@ static int isci_register_sas_ha(struct isci_host *isci_host) return 0; } -static ssize_t isci_show_id(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct Scsi_Host *shost = container_of(dev, typeof(*shost), shost_dev); - struct sas_ha_struct *sas_ha = SHOST_TO_SAS_HA(shost); - struct isci_host *ihost = container_of(sas_ha, typeof(*ihost), sas_ha); - - return snprintf(buf, PAGE_SIZE, "%d\n", ihost->id); -} - -static DEVICE_ATTR(isci_id, S_IRUGO, isci_show_id, NULL); - static void isci_unregister(struct isci_host *isci_host) { struct Scsi_Host *shost; @@ -251,7 +258,6 @@ static void isci_unregister(struct isci_host *isci_host) return; shost = isci_host->shost; - device_remove_file(&shost->shost_dev, &dev_attr_isci_id); sas_unregister_ha(&isci_host->sas_ha); @@ -415,14 +421,8 @@ static struct isci_host *isci_host_alloc(struct pci_dev *pdev, int id) if (err) goto err_shost_remove; - err = device_create_file(&shost->shost_dev, &dev_attr_isci_id); - if (err) - goto err_unregister_ha; - return isci_host; - err_unregister_ha: - sas_unregister_ha(&(isci_host->sas_ha)); err_shost_remove: scsi_remove_host(shost); err_shost: -- cgit v1.2.3 From 39ea2c5b5ffaa344467da53e885cfa4ac0105050 Mon Sep 17 00:00:00 2001 From: Jeff Skirvin Date: Fri, 29 Jul 2011 17:17:05 -0700 Subject: [SCSI] isci: Leave requests alone if already terminating. Instead of immediately completing any request that has a second termination call made on it, wait for the TC done/abort HW event. Signed-off-by: Jeff Skirvin Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/request.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index b4cf998385b3..b5d3a8c4d329 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -732,12 +732,20 @@ sci_io_request_terminate(struct isci_request *ireq) sci_change_state(&ireq->sm, SCI_REQ_ABORTING); return SCI_SUCCESS; case SCI_REQ_TASK_WAIT_TC_RESP: + /* The task frame was already confirmed to have been + * sent by the SCU HW. Since the state machine is + * now only waiting for the task response itself, + * abort the request and complete it immediately + * and don't wait for the task response. + */ sci_change_state(&ireq->sm, SCI_REQ_ABORTING); sci_change_state(&ireq->sm, SCI_REQ_COMPLETED); return SCI_SUCCESS; case SCI_REQ_ABORTING: - sci_change_state(&ireq->sm, SCI_REQ_COMPLETED); - return SCI_SUCCESS; + /* If a request has a termination requested twice, return + * a failure indication, since HW confirmation of the first + * abort is still outstanding. + */ case SCI_REQ_COMPLETED: default: dev_warn(&ireq->owning_controller->pdev->dev, -- cgit v1.2.3 From 9b4be528999483d70a1ffc0accd102e477d5a503 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Jul 2011 17:17:10 -0700 Subject: [SCSI] isci: dynamic interrupt coalescing Hardware allows both an outstanding number commands and a timeout value (whichever occurs first) as a gate to the next interrupt generation. This scheme at completion time looks at the remaining number of outstanding tasks and sets the timeout to maximize small transaction operation. If transactions are large (take more than a few 10s of microseconds to complete) then performance is not interrupt processing bound, so the small timeouts this scheme generates are overridden by the time it takes for a completion to arrive. Tested-by: Dave Jiang Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/host.c | 10 +++++++++- drivers/scsi/isci/host.h | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 26072f1e9852..2328f98c7f1e 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -1091,6 +1091,7 @@ static void isci_host_completion_routine(unsigned long data) struct isci_request *request; struct isci_request *next_request; struct sas_task *task; + u16 active; INIT_LIST_HEAD(&completed_request_list); INIT_LIST_HEAD(&errored_request_list); @@ -1181,6 +1182,13 @@ static void isci_host_completion_routine(unsigned long data) } } + /* the coalesence timeout doubles at each encoding step, so + * update it based on the ilog2 value of the outstanding requests + */ + active = isci_tci_active(ihost); + writel(SMU_ICC_GEN_VAL(NUMBER, active) | + SMU_ICC_GEN_VAL(TIMER, ISCI_COALESCE_BASE + ilog2(active)), + &ihost->smu_registers->interrupt_coalesce_control); } /** @@ -1471,7 +1479,7 @@ static void sci_controller_ready_state_enter(struct sci_base_state_machine *sm) struct isci_host *ihost = container_of(sm, typeof(*ihost), sm); /* set the default interrupt coalescence number and timeout value. */ - sci_controller_set_interrupt_coalescence(ihost, 0x10, 250); + sci_controller_set_interrupt_coalescence(ihost, 0, 0); } static void sci_controller_ready_state_exit(struct sci_base_state_machine *sm) diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h index 062101a39f79..9f33831a2f04 100644 --- a/drivers/scsi/isci/host.h +++ b/drivers/scsi/isci/host.h @@ -369,6 +369,9 @@ static inline struct isci_host *dev_to_ihost(struct domain_device *dev) #define ISCI_TAG_SEQ(tag) (((tag) >> 12) & (SCI_MAX_SEQ-1)) #define ISCI_TAG_TCI(tag) ((tag) & (SCI_MAX_IO_REQUESTS-1)) +/* interrupt coalescing baseline: 9 == 3 to 5us interrupt delay per command */ +#define ISCI_COALESCE_BASE 9 + /* expander attached sata devices require 3 rnc slots */ static inline int sci_remote_device_node_count(struct isci_remote_device *idev) { -- cgit v1.2.3 From 77cd72a53f6426f81b7f56a862402849ee903bda Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Jul 2011 17:17:16 -0700 Subject: [SCSI] isci: fix event-get pointer increment Hardware only increments the put pointer on event types >= 4. Do not increment the get pointer for event type 3. Reported-by: Kapil Karkra Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/host.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/isci/host.c b/drivers/scsi/isci/host.c index 2328f98c7f1e..6981b773a88d 100644 --- a/drivers/scsi/isci/host.c +++ b/drivers/scsi/isci/host.c @@ -531,6 +531,9 @@ static void sci_controller_process_completions(struct isci_host *ihost) break; case SCU_COMPLETION_TYPE_EVENT: + sci_controller_event_completion(ihost, ent); + break; + case SCU_COMPLETION_TYPE_NOTIFY: { event_cycle ^= ((event_get+1) & SCU_MAX_EVENTS) << (SMU_COMPLETION_QUEUE_GET_EVENT_CYCLE_BIT_SHIFT - SCU_MAX_EVENTS_SHIFT); -- cgit v1.2.3 From 98e2a5a3a125608505783bdb95744997f76b3c30 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 29 Jul 2011 17:17:21 -0700 Subject: [SCSI] isci: add version number Signed-off-by: Dan Williams Signed-off-by: James Bottomley --- drivers/scsi/isci/init.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index e78320bbec4f..29aa34efb0f5 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -64,6 +64,14 @@ #include "task.h" #include "probe_roms.h" +#define MAJ 1 +#define MIN 0 +#define BUILD 0 +#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ + __stringify(BUILD) + +MODULE_VERSION(DRV_VERSION); + static struct scsi_transport_template *isci_transport_template; static DEFINE_PCI_DEVICE_TABLE(isci_id_table) = { @@ -540,7 +548,8 @@ static __init int isci_init(void) { int err; - pr_info("%s: Intel(R) C600 SAS Controller Driver\n", DRV_NAME); + pr_info("%s: Intel(R) C600 SAS Controller Driver - version %s\n", + DRV_NAME, DRV_VERSION); isci_transport_template = sas_domain_attach_transport(&isci_transport_ops); if (!isci_transport_template) -- cgit v1.2.3 From b4cb0d4da745bc1d806b9b4a27cc4ce1f7adbf99 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 23 Aug 2011 21:04:28 -0700 Subject: hwmon: (i5k_amb) Drop i5k_channel_pci_id Function i5k_channel_pci_id looks like it can fail, while a better code design would make it more obvious that it can't. We can even get rid of the function. Signed-off-by: Jean Delvare Acked-by: Darrick J. Wong Signed-off-by: Guenter Roeck --- drivers/hwmon/i5k_amb.c | 42 ++++++++++++++---------------------------- 1 file changed, 14 insertions(+), 28 deletions(-) diff --git a/drivers/hwmon/i5k_amb.c b/drivers/hwmon/i5k_amb.c index c4c40be0edbf..d22f241b6a67 100644 --- a/drivers/hwmon/i5k_amb.c +++ b/drivers/hwmon/i5k_amb.c @@ -114,7 +114,6 @@ struct i5k_amb_data { void __iomem *amb_mmio; struct i5k_device_attribute *attrs; unsigned int num_attrs; - unsigned long chipset_id; }; static ssize_t show_name(struct device *dev, struct device_attribute *devattr, @@ -444,8 +443,6 @@ static int __devinit i5k_find_amb_registers(struct i5k_amb_data *data, goto out; } - data->chipset_id = devid; - res = 0; out: pci_dev_put(pcidev); @@ -478,23 +475,13 @@ out: return res; } -static unsigned long i5k_channel_pci_id(struct i5k_amb_data *data, - unsigned long channel) -{ - switch (data->chipset_id) { - case PCI_DEVICE_ID_INTEL_5000_ERR: - return PCI_DEVICE_ID_INTEL_5000_FBD0 + channel; - case PCI_DEVICE_ID_INTEL_5400_ERR: - return PCI_DEVICE_ID_INTEL_5400_FBD0 + channel; - default: - BUG(); - } -} - -static unsigned long chipset_ids[] = { - PCI_DEVICE_ID_INTEL_5000_ERR, - PCI_DEVICE_ID_INTEL_5400_ERR, - 0 +static struct { + unsigned long err; + unsigned long fbd0; +} chipset_ids[] __devinitdata = { + { PCI_DEVICE_ID_INTEL_5000_ERR, PCI_DEVICE_ID_INTEL_5000_FBD0 }, + { PCI_DEVICE_ID_INTEL_5400_ERR, PCI_DEVICE_ID_INTEL_5400_FBD0 }, + { 0, 0 } }; #ifdef MODULE @@ -510,8 +497,7 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev) { struct i5k_amb_data *data; struct resource *reso; - int i; - int res = -ENODEV; + int i, res; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) @@ -520,22 +506,22 @@ static int __devinit i5k_amb_probe(struct platform_device *pdev) /* Figure out where the AMB registers live */ i = 0; do { - res = i5k_find_amb_registers(data, chipset_ids[i]); + res = i5k_find_amb_registers(data, chipset_ids[i].err); + if (res == 0) + break; i++; - } while (res && chipset_ids[i]); + } while (chipset_ids[i].err); if (res) goto err; /* Copy the DIMM presence map for the first two channels */ - res = i5k_channel_probe(&data->amb_present[0], - i5k_channel_pci_id(data, 0)); + res = i5k_channel_probe(&data->amb_present[0], chipset_ids[i].fbd0); if (res) goto err; /* Copy the DIMM presence map for the optional second two channels */ - i5k_channel_probe(&data->amb_present[2], - i5k_channel_pci_id(data, 1)); + i5k_channel_probe(&data->amb_present[2], chipset_ids[i].fbd0 + 1); /* Set up resource regions */ reso = request_mem_region(data->amb_base, data->amb_len, DRVNAME); -- cgit v1.2.3 From e8037d49835482c9534a9a07bed0d0ea831135ae Mon Sep 17 00:00:00 2001 From: Eric Seppanen Date: Tue, 23 Aug 2011 21:25:12 +0200 Subject: block: Fix queue_flag update when rq_affinity goes from 2 to 1 Commit 5757a6d76cdf added the QUEUE_FLAG_SAME_FORCE flag, but fails to clear that flag when the current state is '2' (SAME_COMP + SAME_FORCE) and the new state is '1' (SAME_COMP). Acked-by: Dan Williams Reviewed-by: Roland Dreier Signed-off-by: Eric Seppanen Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 0ee17b5e7fb6..e681805cdb47 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -258,11 +258,13 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) ret = queue_var_store(&val, page, count); spin_lock_irq(q->queue_lock); - if (val) { + if (val == 2) { queue_flag_set(QUEUE_FLAG_SAME_COMP, q); - if (val == 2) - queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); - } else { + queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); + } else if (val == 1) { + queue_flag_set(QUEUE_FLAG_SAME_COMP, q); + queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); + } else if (val == 0) { queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } -- cgit v1.2.3 From c2183d1e9b3f313dd8ba2b1b0197c8d9fb86a7ae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 24 Aug 2011 10:20:17 +0200 Subject: fuse: check size of FUSE_NOTIFY_INVAL_ENTRY message FUSE_NOTIFY_INVAL_ENTRY didn't check the length of the write so the message processing could overrun and result in a "kernel BUG at fs/fuse/dev.c:629!" Reported-by: Han-Wen Nienhuys Signed-off-by: Miklos Szeredi CC: stable@kernel.org --- fs/fuse/dev.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 640fc229df10..168a80f7f12b 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1358,6 +1358,10 @@ static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, if (outarg.namelen > FUSE_NAME_MAX) goto err; + err = -EINVAL; + if (size != sizeof(outarg) + outarg.namelen + 1) + goto err; + name.name = buf; name.len = outarg.namelen; err = fuse_copy_one(cs, buf, outarg.namelen + 1); -- cgit v1.2.3 From 0ebb962e00a52b644433065d224ed89f72a84756 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Wed, 20 Jul 2011 15:43:42 +0100 Subject: ARM: 7003/1: vexpress: Add clock definition for the SP805. It seems that an entry for the SP805 watchdog in the table of clocks was missing. This results in the sp805_wdt driver rejecting the device with the following errors: sp805-wdt mb:wdt: Clock not found sp805-wdt mb:wdt: Probe Failed!!! sp805-wdt: probe of mb:wdt failed with error -2 While not obviously stated in the hardware docs, the onboard SP810's "REFCLK" is connected to a 32.768KHz crystal, and this drives the watchdog. Add a struct clk and corresponding lookup entry for it. Signed-off-by: Nick Bowler Signed-off-by: Russell King --- arch/arm/mach-vexpress/v2m.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 9e6b93b1a043..d0d267a8d3f9 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -318,6 +318,10 @@ static struct clk v2m_sp804_clk = { .rate = 1000000, }; +static struct clk v2m_ref_clk = { + .rate = 32768, +}; + static struct clk dummy_apb_pclk; static struct clk_lookup v2m_lookups[] = { @@ -348,6 +352,9 @@ static struct clk_lookup v2m_lookups[] = { }, { /* CLCD */ .dev_id = "mb:clcd", .clk = &osc1_clk, + }, { /* SP805 WDT */ + .dev_id = "mb:wdt", + .clk = &v2m_ref_clk, }, { /* SP804 timers */ .dev_id = "sp804", .con_id = "v2m-timer0", -- cgit v1.2.3 From 7675535958175b85b8117bcee245d9ecbc4d3d74 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 24 Aug 2011 10:53:10 +0200 Subject: ALSA: hda/conexant - Enable ADC-switching for auto-mic mode, too The ADC-switching can work also in the auto-mic mode, too. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 4c462c3d6462..5616444a8ed7 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3866,7 +3866,7 @@ static void cx_auto_parse_input(struct hda_codec *codec) } if (imux->num_items >= 2 && cfg->num_inputs == imux->num_items) cx_auto_check_auto_mic(codec); - if (imux->num_items > 1 && !spec->auto_mic) { + if (imux->num_items > 1) { for (i = 1; i < imux->num_items; i++) { if (spec->imux_info[i].adc != spec->imux_info[0].adc) { spec->adc_switching = 1; -- cgit v1.2.3 From 52c49e0156e167fa65bbc3dd87a3a2f651af03fb Mon Sep 17 00:00:00 2001 From: Joseph Pentland Date: Tue, 23 Aug 2011 10:41:50 +0100 Subject: ASoC: Add Springbank I/O card to Speyside Kconfig Signed-off-by: Joseph Pentland Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/samsung/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/samsung/Kconfig b/sound/soc/samsung/Kconfig index b99091fc34eb..65f980ef2870 100644 --- a/sound/soc/samsung/Kconfig +++ b/sound/soc/samsung/Kconfig @@ -185,6 +185,7 @@ config SND_SOC_SPEYSIDE select SND_SAMSUNG_I2S select SND_SOC_WM8996 select SND_SOC_WM9081 + select SND_SOC_WM1250_EV1 config SND_SOC_SPEYSIDE_WM8962 tristate "Audio support for Wolfson Speyside with WM8962" -- cgit v1.2.3 From 250b68512dd7e7d31a8c85a740a4b085bade4ba0 Mon Sep 17 00:00:00 2001 From: Sangbeom Kim Date: Tue, 23 Aug 2011 19:36:59 +0900 Subject: ASoC: Add samsung maintainer Signed-off-by: Sangbeom Kim Acked-by: Jassi Brar Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 46e3e6b99220..4f555d8e5346 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5532,6 +5532,7 @@ F: include/media/*7146* SAMSUNG AUDIO (ASoC) DRIVERS M: Jassi Brar +M: Sangbeom Kim L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported F: sound/soc/samsung -- cgit v1.2.3 From ee1a4d4b7fcfce31dade9f2ad333b34159cee799 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 23 Aug 2011 11:16:28 -0600 Subject: ASoC: Tegra: wm8903 machine driver: Drop Ventana support Board file support for Ventana is not yet mainlined, and probably won't ever be given the move to Device-Tree. Consequently, the Ventana entry is being removed from arch/arm/tools/mach-types in the next merge window, since it was registered over a year ago. This will also remove function machine_is_ventana(), which is used by the ASoC Tegra WM8903 machine driver. This will cause compilation failures. Drop Ventana support to resolve this. Hopefully, in the not-too-distant future, tegra_wm8903.c will be able to configure itself from Device-Tree, and hence we'll be able to re-instate Ventana support just by creating a .dts file for the board. Also note that Aebl support is in a similar boat. However, that board isn't scheduled for deprecation for at least another 5 months, and perhaps we will have completely removed non-Device-Tree support from tegra_wm8903.c by then and/or adjusted mach-types policy. Signed-off-by: Stephen Warren Acked-by: Liam Girdwood Signed-off-by: Mark Brown --- sound/soc/tegra/tegra_wm8903.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/tegra/tegra_wm8903.c b/sound/soc/tegra/tegra_wm8903.c index 661373c2352a..be27f1d229af 100644 --- a/sound/soc/tegra/tegra_wm8903.c +++ b/sound/soc/tegra/tegra_wm8903.c @@ -319,7 +319,7 @@ static int tegra_wm8903_init(struct snd_soc_pcm_runtime *rtd) snd_soc_dapm_force_enable_pin(dapm, "Mic Bias"); /* FIXME: Calculate automatically based on DAPM routes? */ - if (!machine_is_harmony() && !machine_is_ventana()) + if (!machine_is_harmony()) snd_soc_dapm_nc_pin(dapm, "IN1L"); if (!machine_is_seaboard() && !machine_is_aebl()) snd_soc_dapm_nc_pin(dapm, "IN1R"); @@ -395,7 +395,7 @@ static __devinit int tegra_wm8903_driver_probe(struct platform_device *pdev) platform_set_drvdata(pdev, card); snd_soc_card_set_drvdata(card, machine); - if (machine_is_harmony() || machine_is_ventana()) { + if (machine_is_harmony()) { card->dapm_routes = harmony_audio_map; card->num_dapm_routes = ARRAY_SIZE(harmony_audio_map); } else if (machine_is_seaboard()) { -- cgit v1.2.3 From c0764b2a4cdc41779460eb8796bc76e4fbddf339 Mon Sep 17 00:00:00 2001 From: Jean-Christophe PLAGNIOL-VILLARD Date: Tue, 23 Aug 2011 16:35:31 +0200 Subject: at91: at91sam9261.c: fix typo in t2_clk alias for atmel_tcb.0 This was a typo in clockdev declaration for at91sam9261 SoC. Fix the kernel hanging when switching clocksource to TC (tcb_clksrc). Signed-off-by: Jean-Christophe PLAGNIOL-VILLARD Acked-by: Remy Bohmer Acked-by: Nicolas Ferre --- arch/arm/mach-at91/at91sam9261.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-at91/at91sam9261.c b/arch/arm/mach-at91/at91sam9261.c index d522b47e30b5..6c8e3b5f669f 100644 --- a/arch/arm/mach-at91/at91sam9261.c +++ b/arch/arm/mach-at91/at91sam9261.c @@ -157,7 +157,7 @@ static struct clk_lookup periph_clocks_lookups[] = { CLKDEV_CON_DEV_ID("spi_clk", "atmel_spi.1", &spi1_clk), CLKDEV_CON_DEV_ID("t0_clk", "atmel_tcb.0", &tc0_clk), CLKDEV_CON_DEV_ID("t1_clk", "atmel_tcb.0", &tc1_clk), - CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc1_clk), + CLKDEV_CON_DEV_ID("t2_clk", "atmel_tcb.0", &tc2_clk), CLKDEV_CON_DEV_ID("pclk", "ssc.0", &ssc0_clk), CLKDEV_CON_DEV_ID("pclk", "ssc.1", &ssc1_clk), CLKDEV_CON_DEV_ID("pclk", "ssc.2", &ssc2_clk), -- cgit v1.2.3 From a63271627521b825b0dd0a564e9a9c62b4c1ca89 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 24 Aug 2011 16:04:32 +0200 Subject: block: change force plug flush call order Do blk_flush_plug_list() first and then add new request aDo blk_flush_plug_list() first and then add new request aDo blk_flush_plug_list() first and then add new request at the tail. New request can't be merged to existing requests, but later new requests might be merged with this new one. If blk_flush_plug_list() is done later, the merge doesn't happen. Believe it or not, this fixes a 10% regression running sysbench workload. Signed-off-by: Shaohua Li Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 90e1ffdeb415..67dba6941194 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1302,11 +1302,11 @@ get_rq: if (__rq->q != q) plug->should_sort = 1; } - list_add_tail(&req->queuelist, &plug->list); - plug->count++; - drive_stat_acct(req, 1); if (plug->count >= BLK_MAX_REQUEST_COUNT) blk_flush_plug_list(plug, false); + plug->count++; + list_add_tail(&req->queuelist, &plug->list); + drive_stat_acct(req, 1); } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); -- cgit v1.2.3 From 56ebdaf2fa3c5276be201c5d1aff1490b682ecf2 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 24 Aug 2011 16:04:34 +0200 Subject: block: simplify force plug flush code a little bit Cleaning up the code a little bit. attempt_plug_merge() traverses the plug list anyway, we can do the request counting there, so stack size is reduced a little bit. The motivation here is I suspect if we should count the requests for each queue (task could handle multiple disks in the meantime), but my test doesn't show it's worthy doing. If somebody proves we should do it, below change will make that more easier. Signed-off-by: Shaohua Li Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- block/blk-core.c | 13 +++++++------ include/linux/blkdev.h | 1 - 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 67dba6941194..b2ed78afd9f0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1167,7 +1167,7 @@ static bool bio_attempt_front_merge(struct request_queue *q, * true if merge was successful, otherwise false. */ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, - struct bio *bio) + struct bio *bio, unsigned int *request_count) { struct blk_plug *plug; struct request *rq; @@ -1176,10 +1176,13 @@ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, plug = tsk->plug; if (!plug) goto out; + *request_count = 0; list_for_each_entry_reverse(rq, &plug->list, queuelist) { int el_ret; + (*request_count)++; + if (rq->q != q) continue; @@ -1219,6 +1222,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) struct blk_plug *plug; int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; struct request *req; + unsigned int request_count = 0; /* * low level driver can indicate that it wants pages above a @@ -1237,7 +1241,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) * Check if we can merge with the plugged list before grabbing * any locks. */ - if (attempt_plug_merge(current, q, bio)) + if (attempt_plug_merge(current, q, bio, &request_count)) goto out; spin_lock_irq(q->queue_lock); @@ -1302,9 +1306,8 @@ get_rq: if (__rq->q != q) plug->should_sort = 1; } - if (plug->count >= BLK_MAX_REQUEST_COUNT) + if (request_count >= BLK_MAX_REQUEST_COUNT) blk_flush_plug_list(plug, false); - plug->count++; list_add_tail(&req->queuelist, &plug->list); drive_stat_acct(req, 1); } else { @@ -2634,7 +2637,6 @@ void blk_start_plug(struct blk_plug *plug) INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->cb_list); plug->should_sort = 0; - plug->count = 0; /* * If this is a nested plug, don't actually assign it. It will be @@ -2718,7 +2720,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) return; list_splice_init(&plug->list, &list); - plug->count = 0; if (plug->should_sort) { list_sort(NULL, &list, plug_rq_cmp); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84b15d54f8c2..7fbaa9103344 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -873,7 +873,6 @@ struct blk_plug { struct list_head list; struct list_head cb_list; unsigned int should_sort; - unsigned int count; }; #define BLK_MAX_REQUEST_COUNT 16 -- cgit v1.2.3 From 27e7318c3e47e4fac71fcb472623434063ccc7a5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 24 Aug 2011 17:15:09 +0200 Subject: [S390] nss,initrd: kernel image and initrd must be in different segments When IPL'ing from a block device and an NSS should be created we must make sure that the kernel image and the initrd are in different 1MB segments. Otherwise creating the NSS will fail. So we make sure the initrd is 4MB behind the end of the kernel image like we do already when IPL via the VM reader is performed. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/early.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 068f8465c4ee..f297456dba7a 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -396,17 +396,19 @@ static __init void detect_machine_facilities(void) static __init void rescue_initrd(void) { #ifdef CONFIG_BLK_DEV_INITRD + unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); /* - * Move the initrd right behind the bss section in case it starts - * within the bss section. So we don't overwrite it when the bss - * section gets cleared. + * Just like in case of IPL from VM reader we make sure there is a + * gap of 4MB between end of kernel and start of initrd. + * That way we can also be sure that saving an NSS will succeed, + * which however only requires different segments. */ if (!INITRD_START || !INITRD_SIZE) return; - if (INITRD_START >= (unsigned long) __bss_stop) + if (INITRD_START >= min_initrd_addr) return; - memmove(__bss_stop, (void *) INITRD_START, INITRD_SIZE); - INITRD_START = (unsigned long) __bss_stop; + memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = min_initrd_addr; #endif } -- cgit v1.2.3 From ba465d830ed1703713251917f154688ec537580f Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 24 Aug 2011 17:15:10 +0200 Subject: [S390] drivers/s390/block/dasd_ioctl.c: add missing kfree Data is only used to temporarily hold information to be copied to the user level, so it should be freed before leaving the function. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @exists@ local idexpression x; statement S,S1; expression E; identifier fl; expression *ptr != NULL; @@ x = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S <... when != x when != if (...) { <+...kfree(x)...+> } when any when != true x == NULL x->fl ...> ( if (x == NULL) S1 | if (...) { ... when != x when forall ( return \(0\|<+...x...+>\|ptr\); | * return ...; ) } ) // Signed-off-by: Julia Lawall Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_ioctl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index eb4e034378cd..f1a2016829fc 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -249,6 +249,7 @@ static int dasd_ioctl_reset_profile(struct dasd_block *block) static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp) { struct dasd_profile_info_t *data; + int rc = 0; data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) @@ -279,11 +280,14 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp) spin_unlock_bh(&block->profile.lock); } else { spin_unlock_bh(&block->profile.lock); - return -EIO; + rc = -EIO; + goto out; } if (copy_to_user(argp, data, sizeof(*data))) - return -EFAULT; - return 0; + rc = -EFAULT; +out: + kfree(data); + return rc; } #else static int dasd_ioctl_reset_profile(struct dasd_block *block) -- cgit v1.2.3 From 798620fb1dd510d163f1c875c8422dc605f446da Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 24 Aug 2011 17:15:11 +0200 Subject: [S390] arch/s390/kernel/ipl.c: correct error detection check reipl_fcp_kset was just initialized, so it appears that it should be tested instead of reipl_kset. Signed-off-by: Julia Lawall Reported-by: Suman Saha Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 04361d5a4279..ee28e064ac3d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1220,7 +1220,7 @@ static int __init reipl_fcp_init(void) /* sysfs: create fcp kset for mixing attr group and bin attrs */ reipl_fcp_kset = kset_create_and_add(IPL_FCP_STR, NULL, &reipl_kset->kobj); - if (!reipl_kset) { + if (!reipl_fcp_kset) { free_page((unsigned long) reipl_block_fcp); return -ENOMEM; } -- cgit v1.2.3 From e1202edadbf846f0a4de70c8c0b9fe5a6c88b1cb Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Wed, 24 Aug 2011 17:15:12 +0200 Subject: [S390] Change default action from reipl to stop for on_restart The main purpose for PSW restart will be kdump. Therefore customers will issue "system restart" for creating a dump. If kdump is not enabled, currently "PSW restart" will reboot the system and then no dump can be created any more. In order to still allow a manual stand-alone dump in the case a user issues "PSW restart" on a system that has not enabled kdump we now stop the system. Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ipl.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index ee28e064ac3d..48c710206366 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1618,7 +1618,8 @@ static struct shutdown_action vmcmd_action = {SHUTDOWN_ACTION_VMCMD_STR, static void stop_run(struct shutdown_trigger *trigger) { - if (strcmp(trigger->name, ON_PANIC_STR) == 0) + if (strcmp(trigger->name, ON_PANIC_STR) == 0 || + strcmp(trigger->name, ON_RESTART_STR) == 0) disabled_wait((unsigned long) __builtin_return_address(0)); while (sigp(smp_processor_id(), sigp_stop) == sigp_busy) cpu_relax(); @@ -1717,7 +1718,7 @@ static void do_panic(void) /* on restart */ static struct shutdown_trigger on_restart_trigger = {ON_RESTART_STR, - &reipl_action}; + &stop_action}; static ssize_t on_restart_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) -- cgit v1.2.3 From 8adb4ca344b48bbbf87ca66fd07a2dd503619714 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 24 Aug 2011 17:15:13 +0200 Subject: [S390] memory hotplug: only unassign assigned increments Make sure that only assigned storage increments are unassigned when attaching a storage element. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/char/sclp_cmd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index be55fb2b1b1c..837e010299a8 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -383,8 +383,10 @@ static int sclp_attach_storage(u8 id) switch (sccb->header.response_code) { case 0x0020: set_bit(id, sclp_storage_ids); - for (i = 0; i < sccb->assigned; i++) - sclp_unassign_storage(sccb->entries[i] >> 16); + for (i = 0; i < sccb->assigned; i++) { + if (sccb->entries[i]) + sclp_unassign_storage(sccb->entries[i] >> 16); + } break; default: rc = -EIO; -- cgit v1.2.3 From 18036b5866b5e407a28f444a80de186a5d7df767 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 24 Aug 2011 16:35:32 +0100 Subject: ASoC: Correct element count for WM8996 sidetone HPF I can count. Honest. Signed-off-by: Mark Brown Acked-by: Liam Girdwood --- sound/soc/codecs/wm8996.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm8996.c b/sound/soc/codecs/wm8996.c index 0936ae5e3749..0cdb9d105671 100644 --- a/sound/soc/codecs/wm8996.c +++ b/sound/soc/codecs/wm8996.c @@ -420,7 +420,7 @@ static const char *sidetone_hpf_text[] = { }; static const struct soc_enum sidetone_hpf = - SOC_ENUM_SINGLE(WM8996_SIDETONE, 7, 6, sidetone_hpf_text); + SOC_ENUM_SINGLE(WM8996_SIDETONE, 7, 7, sidetone_hpf_text); static const char *hpf_mode_text[] = { "HiFi", "Custom", "Voice" -- cgit v1.2.3 From f1c39625d63c9f8eba8f036429c10a9cb9e32920 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 24 Aug 2011 09:54:24 -0700 Subject: xen: use non-tracing preempt in xen_clocksource_read() The tracing code used sched_clock() to get tracing timestamps, which ends up calling xen_clocksource_read(). xen_clocksource_read() must disable preemption, but if preemption tracing is enabled, this results in infinite recursion. I've only noticed this when boot-time tracing tests are enabled, but it seems like a generic bug. It looks like it would also affect kvm_clocksource_read(). Reported-by: Konrad Rzeszutek Wilk Signed-off-by: Jeremy Fitzhardinge Cc: Avi Kivity Cc: Marcelo Tosatti --- arch/x86/xen/time.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 5158c505bef9..163b4679556e 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -168,9 +168,10 @@ cycle_t xen_clocksource_read(void) struct pvclock_vcpu_time_info *src; cycle_t ret; - src = &get_cpu_var(xen_vcpu)->time; + preempt_disable_notrace(); + src = &__get_cpu_var(xen_vcpu)->time; ret = pvclock_clocksource_read(src); - put_cpu_var(xen_vcpu); + preempt_enable_notrace(); return ret; } -- cgit v1.2.3 From 66cb54bd24086b2d871a03035de9b0e79b2b725e Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov Date: Wed, 24 Aug 2011 00:44:32 +0400 Subject: carl9170: Fix mismatch in carl9170_op_set_key mutex lock-unlock If is_main_vif(ar, vif) reports that we have to fall back to software encryption, we goto err_softw; before locking ar->mutex. As a result, we have unprotected call to carl9170_set_operating_mode and unmatched mutex_unlock. The patch fix the issue by adding mutex_lock before goto. Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov Cc: Acked-By: Christian Lamparter Signed-off-by: John W. Linville --- drivers/net/wireless/ath/carl9170/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c index 0122930b14c7..0474e6638d21 100644 --- a/drivers/net/wireless/ath/carl9170/main.c +++ b/drivers/net/wireless/ath/carl9170/main.c @@ -1066,8 +1066,10 @@ static int carl9170_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, * the high througput speed in 802.11n networks. */ - if (!is_main_vif(ar, vif)) + if (!is_main_vif(ar, vif)) { + mutex_lock(&ar->mutex); goto err_softw; + } /* * While the hardware supports *catch-all* key, for offloading -- cgit v1.2.3 From 8b2a3827bb12430d932cd479b22d906baf08c212 Mon Sep 17 00:00:00 2001 From: Mohammed Shafi Shajakhan Date: Wed, 24 Aug 2011 21:38:07 +0530 Subject: ath9k: Fix PS wrappers in ath9k_set_coverage_class this callback is called during suspend/resume and also via iw command. it configures parameters like sifs, slottime, acktimeout in ath9k_hw_init_global_settings where few REG_READ, REG_RMW are also done and hence the need for PS wrappers Cc: stable@kernel.org Signed-off-by: Mohammed Shafi Shajakhan Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 9098aaad97a9..6530694a59ae 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -2283,7 +2283,11 @@ static void ath9k_set_coverage_class(struct ieee80211_hw *hw, u8 coverage_class) mutex_lock(&sc->mutex); ah->coverage_class = coverage_class; + + ath9k_ps_wakeup(sc); ath9k_hw_init_global_settings(ah); + ath9k_ps_restore(sc); + mutex_unlock(&sc->mutex); } -- cgit v1.2.3 From b569ad34926defcff998f214afeb260331165985 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Aug 2011 15:07:35 -0400 Subject: NFSv4: nfs4_proc_async_renew should use a GFP_NOFS allocation We shouldn't allow the renew daemon to do direct reclaim on the NFS partition. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8c77039e7a81..776b41a16469 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3397,7 +3397,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) if (!atomic_inc_not_zero(&clp->cl_count)) return -EIO; - data = kmalloc(sizeof(*data), GFP_KERNEL); + data = kmalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; data->client = clp; -- cgit v1.2.3 From 8534d4ec055d854be6c94e8e5654fa87678ea5f7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Aug 2011 15:07:37 -0400 Subject: NFSv4: nfs4_proc_renew should be declared static Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 2 -- fs/nfs/nfs4proc.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 1ec1a85fa71c..58adfb6e3657 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -237,8 +237,6 @@ extern const struct inode_operations nfs4_dir_inode_operations; extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred); -extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); -extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 776b41a16469..b358ec1d1711 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3386,7 +3386,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { .rpc_release = nfs4_renew_release, }; -int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -3406,7 +3406,7 @@ int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) &nfs4_renew_ops, data); } -int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_renew(struct nfs_client *clp, struct rpc_cred *cred) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], -- cgit v1.2.3 From 2f60ea6b8ceda61ae08bef71a652eac36ec193b3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Aug 2011 15:07:37 -0400 Subject: NFSv4: The NFSv4.0 client must send RENEW calls if it holds a delegation RFC3530 states that if the client holds a delegation, then it is obliged to continue to send RENEW calls once every lease period in order to allow the server to return NFS4ERR_CB_PATH_DOWN if the callback path is unreachable. This is not required for NFSv4.1, since the server can at any time set the SEQ4_STATUS_CB_PATH_DOWN_SESSION in any SEQUENCE operation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 5 ++++- fs/nfs/nfs4proc.c | 8 ++++++-- fs/nfs/nfs4renewd.c | 12 +++++++++--- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 58adfb6e3657..e1b660728675 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -56,6 +56,9 @@ enum nfs4_session_state { NFS4_SESSION_DRAINING, }; +#define NFS4_RENEW_TIMEOUT 0x01 +#define NFS4_RENEW_DELEGATION_CB 0x02 + struct nfs4_minor_version_ops { u32 minor_version; @@ -225,7 +228,7 @@ struct nfs4_state_recovery_ops { }; struct nfs4_state_maintenance_ops { - int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *); + int (*sched_state_renewal)(struct nfs_client *, struct rpc_cred *, unsigned); struct rpc_cred * (*get_state_renewal_cred_locked)(struct nfs_client *); int (*renew_lease)(struct nfs_client *, struct rpc_cred *); }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b358ec1d1711..e89940a2819d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3386,7 +3386,7 @@ static const struct rpc_call_ops nfs4_renew_ops = { .rpc_release = nfs4_renew_release, }; -static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) { struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], @@ -3395,6 +3395,8 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred) }; struct nfs4_renewdata *data; + if (renew_flags == 0) + return 0; if (!atomic_inc_not_zero(&clp->cl_count)) return -EIO; data = kmalloc(sizeof(*data), GFP_NOFS); @@ -5504,11 +5506,13 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ return rpc_run_task(&task_setup_data); } -static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred) +static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cred, unsigned renew_flags) { struct rpc_task *task; int ret = 0; + if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0) + return 0; task = _nfs41_proc_sequence(clp, cred); if (IS_ERR(task)) ret = PTR_ERR(task); diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c index df8e7f3ca56d..dc484c0eae7f 100644 --- a/fs/nfs/nfs4renewd.c +++ b/fs/nfs/nfs4renewd.c @@ -60,6 +60,7 @@ nfs4_renew_state(struct work_struct *work) struct rpc_cred *cred; long lease; unsigned long last, now; + unsigned renew_flags = 0; ops = clp->cl_mvops->state_renewal_ops; dprintk("%s: start\n", __func__); @@ -72,18 +73,23 @@ nfs4_renew_state(struct work_struct *work) last = clp->cl_last_renewal; now = jiffies; /* Are we close to a lease timeout? */ - if (time_after(now, last + lease/3)) { + if (time_after(now, last + lease/3)) + renew_flags |= NFS4_RENEW_TIMEOUT; + if (nfs_delegations_present(clp)) + renew_flags |= NFS4_RENEW_DELEGATION_CB; + + if (renew_flags != 0) { cred = ops->get_state_renewal_cred_locked(clp); spin_unlock(&clp->cl_lock); if (cred == NULL) { - if (!nfs_delegations_present(clp)) { + if (!(renew_flags & NFS4_RENEW_DELEGATION_CB)) { set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); goto out; } nfs_expire_all_delegations(clp); } else { /* Queue an asynchronous RENEW. */ - ops->sched_state_renewal(clp, cred); + ops->sched_state_renewal(clp, cred, renew_flags); put_rpccred(cred); goto out_exp; } -- cgit v1.2.3 From 042b60beb410caf68f576d63d6849d0f0a545eb0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 24 Aug 2011 15:07:37 -0400 Subject: NFSv4: renewd needs to be able to handle the NFS4ERR_CB_PATH_DOWN error The NFSv4 spec does not specify that the server must repeat that error, so in order to avoid having the delegations revoked, we should handle it immediately. Also note that NFS4ERR_CB_PATH_DOWN does in fact renew the lease... Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 8 ++++++-- fs/nfs/nfs4state.c | 6 ++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e1b660728675..3e93e9a1bee1 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -350,6 +350,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t); extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); extern void nfs4_schedule_lease_recovery(struct nfs_client *); extern void nfs4_schedule_state_manager(struct nfs_client *); +extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_recall_slot(struct nfs_client *clp); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e89940a2819d..4700fae1ada0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3374,9 +3374,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { /* Unless we're shutting down, schedule state recovery! */ - if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) + if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) + return; + if (task->tk_status != NFS4ERR_CB_PATH_DOWN) { nfs4_schedule_lease_recovery(clp); - return; + return; + } + nfs4_schedule_path_down_recovery(clp); } do_renew_lease(clp, timestamp); } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 72ab97ef3d61..39914be40b03 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1038,6 +1038,12 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) nfs4_schedule_state_manager(clp); } +void nfs4_schedule_path_down_recovery(struct nfs_client *clp) +{ + nfs_handle_cb_pathdown(clp); + nfs4_schedule_state_manager(clp); +} + static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) { -- cgit v1.2.3 From 3bdf28feafc52864bd7f17b39deec64833a89d19 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 23 Aug 2011 16:48:26 -0500 Subject: ASoC: MPC5200: replace of_device with platform_device 'struct of_device' no longer exists, and its functionality has been merged into platform_device. Update the MPC5200 audio DMA driver (mpc5200_dma) accordingly. This fixes a build break. Signed-off-by: Timur Tabi Acked-by: Liam Girdwood Signed-off-by: Mark Brown Cc: stable@kernel.org --- sound/soc/fsl/mpc5200_dma.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sound/soc/fsl/mpc5200_dma.c b/sound/soc/fsl/mpc5200_dma.c index fd0dc46afc34..5c6c2457386e 100644 --- a/sound/soc/fsl/mpc5200_dma.c +++ b/sound/soc/fsl/mpc5200_dma.c @@ -369,7 +369,7 @@ static struct snd_soc_platform_driver mpc5200_audio_dma_platform = { .pcm_free = &psc_dma_free, }; -static int mpc5200_hpcd_probe(struct of_device *op) +static int mpc5200_hpcd_probe(struct platform_device *op) { phys_addr_t fifo; struct psc_dma *psc_dma; @@ -487,7 +487,7 @@ out_unmap: return ret; } -static int mpc5200_hpcd_remove(struct of_device *op) +static int mpc5200_hpcd_remove(struct platform_device *op) { struct psc_dma *psc_dma = dev_get_drvdata(&op->dev); @@ -519,7 +519,7 @@ MODULE_DEVICE_TABLE(of, mpc5200_hpcd_match); static struct platform_driver mpc5200_hpcd_of_driver = { .probe = mpc5200_hpcd_probe, .remove = mpc5200_hpcd_remove, - .dev = { + .driver = { .owner = THIS_MODULE, .name = "mpc5200-pcm-audio", .of_match_table = mpc5200_hpcd_match, -- cgit v1.2.3 From b7ab83edba2d50583bc9520431618489379718b2 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 24 Aug 2011 21:40:56 +0200 Subject: PM: Use spinlock instead of mutex in clock management functions The lock member of struct pm_clk_data is of type struct mutex, which is a problem, because the suspend and resume routines defined in drivers/base/power/clock_ops.c cannot be executed with interrupts disabled for this reason. Modify struct pm_clk_data so that its lock member is a spinlock. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- drivers/base/power/clock_ops.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c index a846b2f95cfb..2c18d584066d 100644 --- a/drivers/base/power/clock_ops.c +++ b/drivers/base/power/clock_ops.c @@ -19,7 +19,7 @@ struct pm_clk_data { struct list_head clock_list; - struct mutex lock; + spinlock_t lock; }; enum pce_status { @@ -73,9 +73,9 @@ int pm_clk_add(struct device *dev, const char *con_id) } } - mutex_lock(&pcd->lock); + spin_lock_irq(&pcd->lock); list_add_tail(&ce->node, &pcd->clock_list); - mutex_unlock(&pcd->lock); + spin_unlock_irq(&pcd->lock); return 0; } @@ -83,8 +83,8 @@ int pm_clk_add(struct device *dev, const char *con_id) * __pm_clk_remove - Destroy PM clock entry. * @ce: PM clock entry to destroy. * - * This routine must be called under the mutex protecting the PM list of clocks - * corresponding the the @ce's device. + * This routine must be called under the spinlock protecting the PM list of + * clocks corresponding the the @ce's device. */ static void __pm_clk_remove(struct pm_clock_entry *ce) { @@ -123,7 +123,7 @@ void pm_clk_remove(struct device *dev, const char *con_id) if (!pcd) return; - mutex_lock(&pcd->lock); + spin_lock_irq(&pcd->lock); list_for_each_entry(ce, &pcd->clock_list, node) { if (!con_id && !ce->con_id) { @@ -137,7 +137,7 @@ void pm_clk_remove(struct device *dev, const char *con_id) } } - mutex_unlock(&pcd->lock); + spin_unlock_irq(&pcd->lock); } /** @@ -158,7 +158,7 @@ int pm_clk_init(struct device *dev) } INIT_LIST_HEAD(&pcd->clock_list); - mutex_init(&pcd->lock); + spin_lock_init(&pcd->lock); dev->power.subsys_data = pcd; return 0; } @@ -181,12 +181,12 @@ void pm_clk_destroy(struct device *dev) dev->power.subsys_data = NULL; - mutex_lock(&pcd->lock); + spin_lock_irq(&pcd->lock); list_for_each_entry_safe_reverse(ce, c, &pcd->clock_list, node) __pm_clk_remove(ce); - mutex_unlock(&pcd->lock); + spin_unlock_irq(&pcd->lock); kfree(pcd); } @@ -220,13 +220,14 @@ int pm_clk_suspend(struct device *dev) { struct pm_clk_data *pcd = __to_pcd(dev); struct pm_clock_entry *ce; + unsigned long flags; dev_dbg(dev, "%s()\n", __func__); if (!pcd) return 0; - mutex_lock(&pcd->lock); + spin_lock_irqsave(&pcd->lock, flags); list_for_each_entry_reverse(ce, &pcd->clock_list, node) { if (ce->status == PCE_STATUS_NONE) @@ -238,7 +239,7 @@ int pm_clk_suspend(struct device *dev) } } - mutex_unlock(&pcd->lock); + spin_unlock_irqrestore(&pcd->lock, flags); return 0; } @@ -251,13 +252,14 @@ int pm_clk_resume(struct device *dev) { struct pm_clk_data *pcd = __to_pcd(dev); struct pm_clock_entry *ce; + unsigned long flags; dev_dbg(dev, "%s()\n", __func__); if (!pcd) return 0; - mutex_lock(&pcd->lock); + spin_lock_irqsave(&pcd->lock, flags); list_for_each_entry(ce, &pcd->clock_list, node) { if (ce->status == PCE_STATUS_NONE) @@ -269,7 +271,7 @@ int pm_clk_resume(struct device *dev) } } - mutex_unlock(&pcd->lock); + spin_unlock_irqrestore(&pcd->lock, flags); return 0; } @@ -344,6 +346,7 @@ int pm_clk_suspend(struct device *dev) { struct pm_clk_data *pcd = __to_pcd(dev); struct pm_clock_entry *ce; + unsigned long flags; dev_dbg(dev, "%s()\n", __func__); @@ -351,12 +354,12 @@ int pm_clk_suspend(struct device *dev) if (!pcd || !dev->driver) return 0; - mutex_lock(&pcd->lock); + spin_lock_irqsave(&pcd->lock, flags); list_for_each_entry_reverse(ce, &pcd->clock_list, node) clk_disable(ce->clk); - mutex_unlock(&pcd->lock); + spin_unlock_irqrestore(&pcd->lock, flags); return 0; } @@ -369,6 +372,7 @@ int pm_clk_resume(struct device *dev) { struct pm_clk_data *pcd = __to_pcd(dev); struct pm_clock_entry *ce; + unsigned long flags; dev_dbg(dev, "%s()\n", __func__); @@ -376,12 +380,12 @@ int pm_clk_resume(struct device *dev) if (!pcd || !dev->driver) return 0; - mutex_lock(&pcd->lock); + spin_lock_irqsave(&pcd->lock, flags); list_for_each_entry(ce, &pcd->clock_list, node) clk_enable(ce->clk); - mutex_unlock(&pcd->lock); + spin_unlock_irqrestore(&pcd->lock, flags); return 0; } -- cgit v1.2.3 From 5a50a01bf00c8191073fdf518e1af1e950ac3af5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 24 Aug 2011 21:41:08 +0200 Subject: sh-sci / PM: Use power.irq_safe Since sci_port_enable() and sci_port_disable() may be run with interrupts off and they execute pm_runtime_get_sync() and pm_runtime_put_sync(), respectively, the SCI device's power.irq_safe flag has to be set to indicate that it is safe to execute runtime PM callbacks for this device with interrupts off. Signed-off-by: Rafael J. Wysocki Acked-by: Magnus Damm --- drivers/tty/serial/sh-sci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 2ec57b2fb278..a9414facda47 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -1913,6 +1913,7 @@ static int __devinit sci_init_single(struct platform_device *dev, port->dev = &dev->dev; + pm_runtime_irq_safe(&dev->dev); pm_runtime_enable(&dev->dev); } -- cgit v1.2.3 From 5c3f96b20954fd6932bcfb1a860fa1d8b5b22ab0 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 24 Aug 2011 22:38:43 +0200 Subject: ARM: mach-shmobile: sh7372 LCDC1 suspend fix Associate the HDMI clock together with LCDC1 on sh7372. Without this patch Suspend-to-RAM hangs on the boards AP4EVB and Mackerel. The code hangs in the LCDC driver where the software is waiting forever for the hardware to power down. By explicitly associating the HDMI clock with LCDC1 we can make sure the HDMI clock is enabled using Runtime PM whenever the driver is accessing the hardware. This HDMI and LCDC1 dependency is documented in the sh7372 data sheet. Older kernels did work as expected but the recently merged (3.1-rc) 794d78f drivers: sh: late disabling of clocks V2 introduced code to turn off clocks lacking software reference which happens to include the HDMI clock that is needed by LCDC1 to operate as expected. Signed-off-by: Magnus Damm Signed-off-by: Rafael J. Wysocki --- arch/arm/mach-shmobile/board-ap4evb.c | 1 + arch/arm/mach-shmobile/board-mackerel.c | 1 + arch/arm/mach-shmobile/clock-sh7372.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/arch/arm/mach-shmobile/board-ap4evb.c b/arch/arm/mach-shmobile/board-ap4evb.c index 9e0856b2f9e9..fadbe5b3005d 100644 --- a/arch/arm/mach-shmobile/board-ap4evb.c +++ b/arch/arm/mach-shmobile/board-ap4evb.c @@ -1412,6 +1412,7 @@ static void __init ap4evb_init(void) fsi_init_pm_clock(); sh7372_pm_init(); pm_clk_add(&fsi_device.dev, "spu2"); + pm_clk_add(&hdmi_lcdc_device.dev, "hdmi"); } static void __init ap4evb_timer_init(void) diff --git a/arch/arm/mach-shmobile/board-mackerel.c b/arch/arm/mach-shmobile/board-mackerel.c index d41c01f83f15..0ea71f8d4b89 100644 --- a/arch/arm/mach-shmobile/board-mackerel.c +++ b/arch/arm/mach-shmobile/board-mackerel.c @@ -1588,6 +1588,7 @@ static void __init mackerel_init(void) hdmi_init_pm_clock(); sh7372_pm_init(); pm_clk_add(&fsi_device.dev, "spu2"); + pm_clk_add(&hdmi_lcdc_device.dev, "hdmi"); } static void __init mackerel_timer_init(void) diff --git a/arch/arm/mach-shmobile/clock-sh7372.c b/arch/arm/mach-shmobile/clock-sh7372.c index 6b1619a65dba..e6e11e4e2d43 100644 --- a/arch/arm/mach-shmobile/clock-sh7372.c +++ b/arch/arm/mach-shmobile/clock-sh7372.c @@ -655,6 +655,8 @@ static struct clk_lookup lookups[] = { CLKDEV_DEV_ID("renesas_usbhs.1", &mstp_clks[MSTP406]), /* USB1 */ CLKDEV_DEV_ID("sh_keysc.0", &mstp_clks[MSTP403]), /* KEYSC */ + CLKDEV_ICK_ID("hdmi", "sh_mobile_lcdc_fb.1", + &div6_reparent_clks[DIV6_HDMI]), CLKDEV_ICK_ID("ick", "sh-mobile-hdmi", &div6_reparent_clks[DIV6_HDMI]), CLKDEV_ICK_ID("icka", "sh_fsi2", &div6_reparent_clks[DIV6_FSIA]), CLKDEV_ICK_ID("ickb", "sh_fsi2", &div6_reparent_clks[DIV6_FSIB]), -- cgit v1.2.3 From 1b965f1891eac2d8583b5248ef0bcbc91c201e27 Mon Sep 17 00:00:00 2001 From: Omar Ramirez Luna Date: Wed, 24 Aug 2011 15:07:04 -0500 Subject: staging: tidspbridge: fix compilation on dsp clock functions Seen on v3.1-rc3, patch: omap: mcbsp: Drop in-driver transfer support bafe2721a0fbd1cc1af04384133684f660f3658e Removed code that now cause tidspbridge to break while compiling. Signed-off-by: Omar Ramirez Luna Signed-off-by: Greg Kroah-Hartman --- drivers/staging/tidspbridge/core/dsp-clock.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/staging/tidspbridge/core/dsp-clock.c b/drivers/staging/tidspbridge/core/dsp-clock.c index 589a0554332e..3d1279c424a8 100644 --- a/drivers/staging/tidspbridge/core/dsp-clock.c +++ b/drivers/staging/tidspbridge/core/dsp-clock.c @@ -209,7 +209,6 @@ int dsp_clk_enable(enum dsp_clk_id clk_id) break; #ifdef CONFIG_OMAP_MCBSP case MCBSP_CLK: - omap_mcbsp_set_io_type(MCBSP_ID(clk_id), OMAP_MCBSP_POLL_IO); omap_mcbsp_request(MCBSP_ID(clk_id)); omap2_mcbsp_set_clks_src(MCBSP_ID(clk_id), MCBSP_CLKS_PAD_SRC); break; -- cgit v1.2.3 From c8d47631a48f254d062db8084776d1fb24785e7b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 9 Aug 2011 20:17:29 +0200 Subject: i2c-nomadik: fix kerneldoc warning There was a missing struct item in the kerneldoc, add it and fix another pretty-printing formatting issue with a missing space. Signed-off-by: Linus Walleij Signed-off-by: Ben Dooks --- drivers/i2c/busses/i2c-nomadik.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c index f9b8854fe0a5..b228e09c5d05 100644 --- a/drivers/i2c/busses/i2c-nomadik.c +++ b/drivers/i2c/busses/i2c-nomadik.c @@ -146,6 +146,7 @@ struct i2c_nmk_client { * @stop: stop condition * @xfer_complete: acknowledge completion for a I2C message * @result: controller propogated result + * @regulator: pointer to i2c regulator * @busy: Busy doing transfer */ struct nmk_i2c_dev { @@ -509,7 +510,7 @@ static int write_i2c(struct nmk_i2c_dev *dev) if (timeout < 0) { dev_err(&dev->pdev->dev, - "wait_for_completion_timeout" + "wait_for_completion_timeout " "returned %d waiting for event\n", timeout); status = timeout; } -- cgit v1.2.3 From caca9510ff4e5d842c0589110243d60927836222 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 24 Aug 2011 15:55:30 -0700 Subject: firmware loader: allow builtin firmware load even if usermodehelper is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit a144c6a6c924 ("PM: Print a warning if firmware is requested when tasks are frozen") we not only printed a warning if somebody tried to load the firmware when tasks are frozen - we also failed the load. But that check was done before the check for built-in firmware, and then when we disallowed usermode helpers during bootup (commit 288d5abec831: "Boot up with usermodehelper disabled"), that actually means that built-in modules can no longer load their firmware even if the firmware is built in too. Which used to work, and some people depended on it for the R100 driver. So move the test for usermodehelper_is_disabled() down, to after checking the built-in firmware. This should fix: https://bugzilla.kernel.org/show_bug.cgi?id=40952 Reported-by: James Cloos Bisected-by: Elimar Riesebieter Cc: Michel Dänzer Cc: Rafael Wysocki Cc: Greg Kroah-Hartman Cc: Valdis Kletnieks Signed-off-by: Linus Torvalds --- drivers/base/firmware_class.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index bbb03e6f7255..06ed6b4e7df5 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -521,11 +521,6 @@ static int _request_firmware(const struct firmware **firmware_p, if (!firmware_p) return -EINVAL; - if (WARN_ON(usermodehelper_is_disabled())) { - dev_err(device, "firmware: %s will not be loaded\n", name); - return -EBUSY; - } - *firmware_p = firmware = kzalloc(sizeof(*firmware), GFP_KERNEL); if (!firmware) { dev_err(device, "%s: kmalloc(struct firmware) failed\n", @@ -539,6 +534,12 @@ static int _request_firmware(const struct firmware **firmware_p, return 0; } + if (WARN_ON(usermodehelper_is_disabled())) { + dev_err(device, "firmware: %s will not be loaded\n", name); + retval = -EBUSY; + goto out; + } + if (uevent) dev_dbg(device, "firmware: requesting %s\n", name); -- cgit v1.2.3 From 814fd609fa98f3667974d8c27c4d75ef4ce041ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20de=20Peslo=C3=BCan?= Date: Tue, 23 Aug 2011 23:31:42 +0000 Subject: MAINTAINERS: Update GIT trees for network development MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove -2.6 from net and net-next tree names. Signed-off-by: Nicolas de Pesloüan Signed-off-by: David S. Miller --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 069ee3b5c651..c94a898caca9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4450,8 +4450,8 @@ M: "David S. Miller" L: netdev@vger.kernel.org W: http://www.linuxfoundation.org/en/Net W: http://patchwork.ozlabs.org/project/netdev/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git -T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git S: Maintained F: net/ F: include/net/ -- cgit v1.2.3 From e05c4ad3ed874ee4f5e2c969e55d318ec654332c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 23 Aug 2011 22:54:37 +0000 Subject: mcast: Fix source address selection for multicast listener report Should check use count of include mode filter instead of total number of include mode filters. Signed-off-by: Zheng Yan Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 2 +- net/ipv6/mcast.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 283c0a26e03f..d577199eabd5 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -767,7 +767,7 @@ static int igmp_xmarksources(struct ip_mc_list *pmc, int nsrcs, __be32 *srcs) break; for (i=0; isfcount[MCAST_INCLUDE] || + if (psf->sf_count[MCAST_INCLUDE] || pmc->sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) continue; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 3e6ebcdb4779..ee7839f4d6e3 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1059,7 +1059,7 @@ static int mld_xmarksources(struct ifmcaddr6 *pmc, int nsrcs, break; for (i=0; imca_sfcount[MCAST_INCLUDE] || + if (psf->sf_count[MCAST_INCLUDE] || pmc->mca_sfcount[MCAST_EXCLUDE] != psf->sf_count[MCAST_EXCLUDE]) continue; -- cgit v1.2.3 From 4b275d7efa1c4412f0d572fcd7f78ed0919370b3 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 23 Aug 2011 22:54:33 +0000 Subject: bridge: Pseudo-header required for the checksum of ICMPv6 Checksum of ICMPv6 is not properly computed because the pseudo header is not used. Thus, the MLD packet gets dropped by the bridge. Signed-off-by: Zheng Yan Reported-by: Ang Way Chuang Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2d85ca7111d3..22d2d1af1c83 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1520,16 +1520,23 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, err = pskb_trim_rcsum(skb2, len); if (err) goto out; + err = -EINVAL; } + ip6h = ipv6_hdr(skb2); + switch (skb2->ip_summed) { case CHECKSUM_COMPLETE: - if (!csum_fold(skb2->csum)) + if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, skb2->len, + IPPROTO_ICMPV6, skb2->csum)) break; /*FALLTHROUGH*/ case CHECKSUM_NONE: - skb2->csum = 0; - if (skb_checksum_complete(skb2)) + skb2->csum = ~csum_unfold(csum_ipv6_magic(&ip6h->saddr, + &ip6h->daddr, + skb2->len, + IPPROTO_ICMPV6, 0)); + if (__skb_checksum_complete(skb2)) goto out; } -- cgit v1.2.3 From 22df13319d1fec30b8f9bcaadc295829647109bb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 23 Aug 2011 19:57:05 +0000 Subject: bridge: fix a possible use after free br_multicast_ipv6_rcv() can call pskb_trim_rcsum() and therefore skb head can be reallocated. Cache icmp6_type field instead of dereferencing twice the struct icmp6hdr pointer. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 22d2d1af1c83..995cbe0ac0b2 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1456,7 +1456,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, { struct sk_buff *skb2; const struct ipv6hdr *ip6h; - struct icmp6hdr *icmp6h; + u8 icmp6_type; u8 nexthdr; unsigned len; int offset; @@ -1502,9 +1502,9 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, __skb_pull(skb2, offset); skb_reset_transport_header(skb2); - icmp6h = icmp6_hdr(skb2); + icmp6_type = icmp6_hdr(skb2)->icmp6_type; - switch (icmp6h->icmp6_type) { + switch (icmp6_type) { case ICMPV6_MGM_QUERY: case ICMPV6_MGM_REPORT: case ICMPV6_MGM_REDUCTION: @@ -1544,7 +1544,7 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br, BR_INPUT_SKB_CB(skb)->igmp = 1; - switch (icmp6h->icmp6_type) { + switch (icmp6_type) { case ICMPV6_MGM_REPORT: { struct mld_msg *mld; -- cgit v1.2.3 From 20e6074eb8e096b3a595c093d1cb222f378cd671 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 22 Aug 2011 19:32:42 +0000 Subject: arp: fix rcu lockdep splat in arp_process() Dave Jones reported a lockdep splat triggered by an arp_process() call from parp_redo(). Commit faa9dcf793be (arp: RCU changes) is the origin of the bug, since it assumed arp_process() was called under rcu_read_lock(), which is not true in this particular path. Instead of adding rcu_read_lock() in parp_redo(), I chose to add it in neigh_proxy_process() to take care of IPv6 side too. =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- include/linux/inetdevice.h:209 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 4 locks held by setfiles/2123: #0: (&sb->s_type->i_mutex_key#13){+.+.+.}, at: [] walk_component+0x1ef/0x3e8 #1: (&isec->lock){+.+.+.}, at: [] inode_doinit_with_dentry+0x3f/0x41f #2: (&tbl->proxy_timer){+.-...}, at: [] run_timer_softirq+0x157/0x372 #3: (class){+.-...}, at: [] neigh_proxy_process +0x36/0x103 stack backtrace: Pid: 2123, comm: setfiles Tainted: G W 3.1.0-0.rc2.git7.2.fc16.x86_64 #1 Call Trace: [] lockdep_rcu_dereference+0xa7/0xaf [] __in_dev_get_rcu+0x55/0x5d [] arp_process+0x25/0x4d7 [] parp_redo+0xe/0x10 [] neigh_proxy_process+0x9a/0x103 [] run_timer_softirq+0x218/0x372 [] ? run_timer_softirq+0x157/0x372 [] ? neigh_stat_seq_open+0x41/0x41 [] ? mark_held_locks+0x6d/0x95 [] __do_softirq+0x112/0x25a [] call_softirq+0x1c/0x30 [] do_softirq+0x4b/0xa2 [] irq_exit+0x5d/0xcf [] smp_apic_timer_interrupt+0x7c/0x8a [] apic_timer_interrupt+0x73/0x80 [] ? trace_hardirqs_on_caller+0x121/0x158 [] ? __slab_free+0x30/0x24c [] ? __slab_free+0x2e/0x24c [] ? inode_doinit_with_dentry+0x2e9/0x41f [] ? inode_doinit_with_dentry+0x2e9/0x41f [] ? inode_doinit_with_dentry+0x2e9/0x41f [] kfree+0x108/0x131 [] inode_doinit_with_dentry+0x2e9/0x41f [] selinux_d_instantiate+0x1c/0x1e [] security_d_instantiate+0x21/0x23 [] d_instantiate+0x5c/0x61 [] d_splice_alias+0xbc/0xd2 [] ext4_lookup+0xba/0xeb [] d_alloc_and_lookup+0x45/0x6b [] walk_component+0x215/0x3e8 [] lookup_last+0x3b/0x3d [] path_lookupat+0x82/0x2af [] ? might_fault+0xa5/0xac [] ? might_fault+0x5c/0xac [] ? getname_flags+0x31/0x1ca [] do_path_lookup+0x28/0x97 [] user_path_at+0x59/0x96 [] ? cp_new_stat+0xf7/0x10d [] vfs_fstatat+0x44/0x6e [] vfs_lstat+0x1e/0x20 [] sys_newlstat+0x1a/0x33 [] ? trace_hardirqs_on_caller+0x121/0x158 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] system_call_fastpath+0x16/0x1b Reported-by: Dave Jones Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8fab9b0bb203..1334d7e56f02 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1319,11 +1319,15 @@ static void neigh_proxy_process(unsigned long arg) if (tdif <= 0) { struct net_device *dev = skb->dev; + __skb_unlink(skb, &tbl->proxy_queue); - if (tbl->proxy_redo && netif_running(dev)) + if (tbl->proxy_redo && netif_running(dev)) { + rcu_read_lock(); tbl->proxy_redo(skb); - else + rcu_read_unlock(); + } else { kfree_skb(skb); + } dev_put(dev); } else if (!sched_next || tdif < sched_next) -- cgit v1.2.3 From c6f59d13e24187ff95427a9f4a5a7e14fb8faf5a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Aug 2011 17:56:15 -0700 Subject: ibmveth: Fix leak when recycling skb and hypervisor returns error If h_add_logical_lan_buffer returns an error we need to free the skb. Signed-off-by: Anton Blanchard Cc: stable Signed-off-by: David S. Miller --- drivers/net/ibmveth.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index ba99af05bf62..3e6679269400 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -395,7 +395,7 @@ static inline struct sk_buff *ibmveth_rxq_get_buffer(struct ibmveth_adapter *ada } /* recycle the current buffer on the rx queue */ -static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) +static int ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) { u32 q_index = adapter->rx_queue.index; u64 correlator = adapter->rx_queue.queue_addr[q_index].correlator; @@ -403,6 +403,7 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) unsigned int index = correlator & 0xffffffffUL; union ibmveth_buf_desc desc; unsigned long lpar_rc; + int ret = 1; BUG_ON(pool >= IBMVETH_NUM_BUFF_POOLS); BUG_ON(index >= adapter->rx_buff_pool[pool].size); @@ -410,7 +411,7 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) if (!adapter->rx_buff_pool[pool].active) { ibmveth_rxq_harvest_buffer(adapter); ibmveth_free_buffer_pool(adapter, &adapter->rx_buff_pool[pool]); - return; + goto out; } desc.fields.flags_len = IBMVETH_BUF_VALID | @@ -423,12 +424,16 @@ static void ibmveth_rxq_recycle_buffer(struct ibmveth_adapter *adapter) netdev_dbg(adapter->netdev, "h_add_logical_lan_buffer failed " "during recycle rc=%ld", lpar_rc); ibmveth_remove_buffer_from_pool(adapter, adapter->rx_queue.queue_addr[adapter->rx_queue.index].correlator); + ret = 0; } if (++adapter->rx_queue.index == adapter->rx_queue.num_slots) { adapter->rx_queue.index = 0; adapter->rx_queue.toggle = !adapter->rx_queue.toggle; } + +out: + return ret; } static void ibmveth_rxq_harvest_buffer(struct ibmveth_adapter *adapter) @@ -1084,8 +1089,9 @@ restart_poll: if (rx_flush) ibmveth_flush_buffer(skb->data, length + offset); + if (!ibmveth_rxq_recycle_buffer(adapter)) + kfree_skb(skb); skb = new_skb; - ibmveth_rxq_recycle_buffer(adapter); } else { ibmveth_rxq_harvest_buffer(adapter); skb_reserve(skb, offset); -- cgit v1.2.3 From bc909d9ddbf7778371e36a651d6e4194b1cc7d4c Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 24 Aug 2011 19:45:03 -0700 Subject: sendmmsg/sendmsg: fix unsafe user pointer access Dereferencing a user pointer directly from kernel-space without going through the copy_from_user family of functions is a bad idea. Two of such usages can be found in the sendmsg code path called from sendmmsg, added by commit c71d8ebe7a4496fb7231151cb70a6baa0cb56f9a upstream. commit 5b47b8038f183b44d2d8ff1c7d11a5c1be706b34 in the 3.0-stable tree. Usages are performed through memcmp() and memcpy() directly. Fix those by using the already copied msg_sys structure instead of the __user *msg structure. Note that msg_sys can be set to NULL by verify_compat_iovec() or verify_iovec(), which requires additional NULL pointer checks. Signed-off-by: Mathieu Desnoyers Signed-off-by: David Goulet CC: Tetsuo Handa CC: Anton Blanchard CC: David S. Miller CC: stable Signed-off-by: David S. Miller --- net/socket.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/socket.c b/net/socket.c index 24a77400b65e..ffe92ca32f2a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1965,8 +1965,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, * used_address->name_len is initialized to UINT_MAX so that the first * destination address never matches. */ - if (used_address && used_address->name_len == msg_sys->msg_namelen && - !memcmp(&used_address->name, msg->msg_name, + if (used_address && msg_sys->msg_name && + used_address->name_len == msg_sys->msg_namelen && + !memcmp(&used_address->name, msg_sys->msg_name, used_address->name_len)) { err = sock_sendmsg_nosec(sock, msg_sys, total_len); goto out_freectl; @@ -1978,8 +1979,9 @@ static int __sys_sendmsg(struct socket *sock, struct msghdr __user *msg, */ if (used_address && err >= 0) { used_address->name_len = msg_sys->msg_namelen; - memcpy(&used_address->name, msg->msg_name, - used_address->name_len); + if (msg_sys->msg_name) + memcpy(&used_address->name, msg_sys->msg_name, + used_address->name_len); } out_freectl: -- cgit v1.2.3 From 5ef56c8fecedf403a346d02140e52a072d693d6b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Aug 2011 14:42:51 +1000 Subject: md: report failure if a 'set faulty' request doesn't. Sometimes a device will refuse to be set faulty. e.g. RAID1 will never let the last working device become faulty. So check if "md_error()" did manage to set the faulty flag and fail with EBUSY if it didn't. Resolves-Debian-Bug: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=601198 Reported-by: Mike Hommey Signed-off-by: NeilBrown --- drivers/md/md.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 8e221a20f5d9..1cd9bfb45e9a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2561,7 +2561,10 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { md_error(rdev->mddev, rdev); - err = 0; + if (test_bit(Faulty, &rdev->flags)) + err = 0; + else + err = -EBUSY; } else if (cmd_match(buf, "remove")) { if (rdev->raid_disk >= 0) err = -EBUSY; @@ -5983,6 +5986,8 @@ static int set_disk_faulty(mddev_t *mddev, dev_t dev) return -ENODEV; md_error(mddev, rdev); + if (!test_bit(Faulty, &rdev->flags)) + return -EBUSY; return 0; } -- cgit v1.2.3 From aeb9b211849621f592288ed5ad694de9eeaae87a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Aug 2011 14:43:08 +1000 Subject: md: ensure changes to 'write-mostly' are reflected in metadata. The 'write-mostly' flag can be changed through sysfs. With 0.90 metadata, those changes are reflected in the metadata. For 1.x metadata, they aren't. So fix super_1_sync to record 'write-mostly' status. Signed-off-by: NeilBrown --- drivers/md/md.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/md/md.c b/drivers/md/md.c index 1cd9bfb45e9a..9a880239219d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1738,6 +1738,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); + if (test_bit(WriteMostly, &rdev->flags)) + sb->devflags |= WriteMostly1; + else + sb->devflags &= ~WriteMostly1; + if (mddev->bitmap && mddev->bitmap_info.file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); -- cgit v1.2.3 From a5bf4df0c88b88d34b6f0e3bc8a402dac7d14611 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Aug 2011 14:43:34 +1000 Subject: md: use REQ_NOIDLE flag in md_super_write() Queue idling is used for the anticipation of immediate sequencial I/O's but md_super_write() is a kind of one- shot operation, coupled with md_super_wait(), so the idling in this case will be just a waste of time. Specifying REQ_NOIDLE prevents it. Instead of adding the flag to submit_bio() directly, use pre-defined macro WRITE_FLUSH_FUA. Signed-off-by: Namhyung Kim Signed-off-by: NeilBrown --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 9a880239219d..aca611711264 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -848,7 +848,7 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, bio->bi_end_io = super_written; atomic_inc(&mddev->pending_writes); - submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio); + submit_bio(WRITE_FLUSH_FUA, bio); } void md_super_wait(mddev_t *mddev) -- cgit v1.2.3 From 1b6afa17581027218088a18a9ceda600e0ddba7a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 25 Aug 2011 14:43:53 +1000 Subject: md/linear: avoid corrupting structure while waiting for rcu_free to complete. I don't know what I was thinking putting 'rcu' after a dynamically sized array! The array could still be in use when we call rcu_free() (That is the point) so we mustn't corrupt it. Cc: stable@kernel.org Signed-off-by: NeilBrown --- drivers/md/linear.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/linear.h b/drivers/md/linear.h index 0ce29b61605a..2f2da05b2ce9 100644 --- a/drivers/md/linear.h +++ b/drivers/md/linear.h @@ -10,9 +10,9 @@ typedef struct dev_info dev_info_t; struct linear_private_data { + struct rcu_head rcu; sector_t array_sectors; dev_info_t disks[0]; - struct rcu_head rcu; }; -- cgit v1.2.3 From 35d851df23b093ee027f827fed2213ae5e88fc7a Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 25 Aug 2011 14:21:37 +0200 Subject: HID: magicmouse: ignore 'ivalid report id' while switching modes, v2 This is basically a more generic respin of 23746a6 ("HID: magicmouse: ignore 'ivalid report id' while switching modes") which got reverted later by c3a492. It turns out that on some configurations, this is actually still the case and we are not able to detect in runtime. The device reponds with 'invalid report id' when feature report switching it into multitouch mode is sent to it. This has been silently ignored before 0825411ade ("HID: bt: Wait for ACK on Sent Reports"), but since this commit, it propagates -EIO from the _raw callback . So let the driver ignore -EIO as response to 0xd7,0x01 report, as that's how the device reacts in normal mode. Sad, but following reality. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=35022 Reported-by: Chase Douglas Reported-by: Jaikumar Ganesh Tested-by: Chase Douglas Tested-by: Jaikumar Ganesh Signed-off-by: Jiri Kosina --- drivers/hid/hid-magicmouse.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/hid/hid-magicmouse.c b/drivers/hid/hid-magicmouse.c index b5bdab3299bc..f0fbd7bd239e 100644 --- a/drivers/hid/hid-magicmouse.c +++ b/drivers/hid/hid-magicmouse.c @@ -537,9 +537,17 @@ static int magicmouse_probe(struct hid_device *hdev, } report->size = 6; + /* + * Some devices repond with 'invalid report id' when feature + * report switching it into multitouch mode is sent to it. + * + * This results in -EIO from the _raw low-level transport callback, + * but there seems to be no other way of switching the mode. + * Thus the super-ugly hacky success check below. + */ ret = hdev->hid_output_raw_report(hdev, feature, sizeof(feature), HID_FEATURE_REPORT); - if (ret != sizeof(feature)) { + if (ret != -EIO && ret != sizeof(feature)) { hid_err(hdev, "unable to request touch data (%d)\n", ret); goto err_stop_hw; } -- cgit v1.2.3 From 468c5458856236cde6df1b0654d32bf6625349a5 Mon Sep 17 00:00:00 2001 From: David Henningsson Date: Thu, 25 Aug 2011 13:16:02 +0200 Subject: ALSA: hda: Conexant: Allow different output types to share DAC Headphones has stopped working for the original reported (a regression compared to 2.6.38). This is because Speaker and Headphones share the same DAC, in which case no Headphones volume control was created. This patch fixes so that both Speaker and Headphones volume controls are created in such scenario. BugLink: http://bugs.launchpad.net/bugs/817943 Signed-off-by: David Henningsson Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_conexant.c | 46 +++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 5616444a8ed7..7696d05b9356 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -3372,18 +3372,26 @@ static int fill_cx_auto_dacs(struct hda_codec *codec, hda_nid_t *dacs) /* fill pin_dac_pair list from the pin and dac list */ static int fill_dacs_for_pins(struct hda_codec *codec, hda_nid_t *pins, int num_pins, hda_nid_t *dacs, int *rest, - struct pin_dac_pair *filled, int type) + struct pin_dac_pair *filled, int nums, + int type) { - int i, nums; + int i, start = nums; - nums = 0; - for (i = 0; i < num_pins; i++) { + for (i = 0; i < num_pins; i++, nums++) { filled[nums].pin = pins[i]; filled[nums].type = type; filled[nums].dac = get_unassigned_dac(codec, pins[i], dacs, rest); - if (!filled[nums].dac && i > 0 && filled[0].dac) + if (filled[nums].dac) + continue; + if (filled[start].dac && get_connection_index(codec, pins[i], filled[start].dac) >= 0) { + filled[nums].dac = filled[start].dac | DAC_SLAVE_FLAG; + continue; + } + if (filled[0].dac && get_connection_index(codec, pins[i], filled[0].dac) >= 0) { filled[nums].dac = filled[0].dac | DAC_SLAVE_FLAG; - nums++; + continue; + } + snd_printdd("Failed to find a DAC for pin 0x%x", pins[i]); } return nums; } @@ -3399,14 +3407,14 @@ static void cx_auto_parse_output(struct hda_codec *codec) rest = fill_cx_auto_dacs(codec, dacs); /* parse all analog output pins */ nums = fill_dacs_for_pins(codec, cfg->line_out_pins, cfg->line_outs, - dacs, &rest, spec->dac_info, - AUTO_PIN_LINE_OUT); - nums += fill_dacs_for_pins(codec, cfg->hp_pins, cfg->hp_outs, - dacs, &rest, spec->dac_info + nums, - AUTO_PIN_HP_OUT); - nums += fill_dacs_for_pins(codec, cfg->speaker_pins, cfg->speaker_outs, - dacs, &rest, spec->dac_info + nums, - AUTO_PIN_SPEAKER_OUT); + dacs, &rest, spec->dac_info, 0, + AUTO_PIN_LINE_OUT); + nums = fill_dacs_for_pins(codec, cfg->hp_pins, cfg->hp_outs, + dacs, &rest, spec->dac_info, nums, + AUTO_PIN_HP_OUT); + nums = fill_dacs_for_pins(codec, cfg->speaker_pins, cfg->speaker_outs, + dacs, &rest, spec->dac_info, nums, + AUTO_PIN_SPEAKER_OUT); spec->dac_info_filled = nums; /* fill multiout struct */ for (i = 0; i < nums; i++) { @@ -4173,9 +4181,11 @@ static int try_add_pb_volume(struct hda_codec *codec, hda_nid_t dac, hda_nid_t pin, const char *name, int idx) { unsigned int caps; - caps = query_amp_caps(codec, dac, HDA_OUTPUT); - if (caps & AC_AMPCAP_NUM_STEPS) - return cx_auto_add_pb_volume(codec, dac, name, idx); + if (dac && !(dac & DAC_SLAVE_FLAG)) { + caps = query_amp_caps(codec, dac, HDA_OUTPUT); + if (caps & AC_AMPCAP_NUM_STEPS) + return cx_auto_add_pb_volume(codec, dac, name, idx); + } caps = query_amp_caps(codec, pin, HDA_OUTPUT); if (caps & AC_AMPCAP_NUM_STEPS) return cx_auto_add_pb_volume(codec, pin, name, idx); @@ -4198,8 +4208,6 @@ static int cx_auto_build_output_controls(struct hda_codec *codec) const char *label; int idx, type; hda_nid_t dac = spec->dac_info[i].dac; - if (!dac || (dac & DAC_SLAVE_FLAG)) - continue; type = spec->dac_info[i].type; if (type == AUTO_PIN_LINE_OUT) type = spec->autocfg.line_out_type; -- cgit v1.2.3 From 64584eb9cde5f3c5a07f24b2e7cd38f1157be181 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Aug 2011 15:31:05 +0200 Subject: PM / Runtime: Correct documentation of pm_runtime_irq_safe() The description of pm_runtime_irq_safe() has to be updated to follow the code after commit 02b2677 (PM / Runtime: Allow _put_sync() from interrupts-disabled context). Signed-off-by: Rafael J. Wysocki Acked-by: Kevin Hilman --- Documentation/power/runtime_pm.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 4ce5450ab6e8..6066e3a6b9a9 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -431,8 +431,7 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: void pm_runtime_irq_safe(struct device *dev); - set the power.irq_safe flag for the device, causing the runtime-PM - suspend and resume callbacks (but not the idle callback) to be invoked - with interrupts disabled + callbacks to be invoked with interrupts off void pm_runtime_mark_last_busy(struct device *dev); - set the power.last_busy field to the current time -- cgit v1.2.3 From ff35336d3efd1ec4015b56f690191ed69730cbb0 Mon Sep 17 00:00:00 2001 From: Kevin Hilman Date: Thu, 25 Aug 2011 15:31:14 +0200 Subject: OMAP: omap_device: only override _noirq methods, not normal suspend/resume Commit c03f007a8bf0e092caeb6856a5c8a850df10b974 (OMAP: PM: omap_device: add system PM methods for PM domain handling) mistakenly used SET_SYSTEM_SLEEP_PM_OPS() when trying to configure custom methods for the PM domains noirq methods. Fix that by setting only the suspend_noirq and resume_noirq methods with custom versions. Note that all other PM domain methods (including the "normal" suspend/resume methods) are populated using USE_PLATFORM_PM_SLEEP_OPS, which configures them all to the default subsystem (platform_bus) methods. Reported-by: Santosh Shilimkar Tested-by: Santosh Shilimkar Signed-off-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- arch/arm/plat-omap/omap_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/plat-omap/omap_device.c b/arch/arm/plat-omap/omap_device.c index b6b409744954..9a6a53854911 100644 --- a/arch/arm/plat-omap/omap_device.c +++ b/arch/arm/plat-omap/omap_device.c @@ -622,7 +622,8 @@ static struct dev_pm_domain omap_device_pm_domain = { SET_RUNTIME_PM_OPS(_od_runtime_suspend, _od_runtime_resume, _od_runtime_idle) USE_PLATFORM_PM_SLEEP_OPS - SET_SYSTEM_SLEEP_PM_OPS(_od_suspend_noirq, _od_resume_noirq) + .suspend_noirq = _od_suspend_noirq, + .resume_noirq = _od_resume_noirq, } }; -- cgit v1.2.3 From 6d1db0777981e1626ae71243984ac300b61789ff Mon Sep 17 00:00:00 2001 From: Clemens Werther Date: Thu, 25 Aug 2011 15:35:14 +0200 Subject: HID: add support for HuiJia USB Gamepad connector Create each gamepad as a separate joystick Signed-off-by: Clemens Werther Signed-off-by: Jiri Kosina --- drivers/hid/hid-ids.h | 1 + drivers/hid/usbhid/hid-quirks.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 7d27d2b0445a..7484e1b67249 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -277,6 +277,7 @@ #define USB_DEVICE_ID_PENPOWER 0x00f4 #define USB_VENDOR_ID_GREENASIA 0x0e8f +#define USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD 0x3013 #define USB_VENDOR_ID_GRETAGMACBETH 0x0971 #define USB_DEVICE_ID_GRETAGMACBETH_HUEY 0x2005 diff --git a/drivers/hid/usbhid/hid-quirks.c b/drivers/hid/usbhid/hid-quirks.c index 4bdb5d46c52c..3146fdcda272 100644 --- a/drivers/hid/usbhid/hid-quirks.c +++ b/drivers/hid/usbhid/hid-quirks.c @@ -47,6 +47,7 @@ static const struct hid_blacklist { { USB_VENDOR_ID_AFATECH, USB_DEVICE_ID_AFATECH_AF9016, HID_QUIRK_FULLSPEED_INTERVAL }, { USB_VENDOR_ID_ETURBOTOUCH, USB_DEVICE_ID_ETURBOTOUCH, HID_QUIRK_MULTI_INPUT }, + { USB_VENDOR_ID_GREENASIA, USB_DEVICE_ID_GREENASIA_DUAL_USB_JOYPAD, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_PANTHERLORD, USB_DEVICE_ID_PANTHERLORD_TWIN_USB_JOYSTICK, HID_QUIRK_MULTI_INPUT | HID_QUIRK_SKIP_OUTPUT_REPORTS }, { USB_VENDOR_ID_PLAYDOTCOM, USB_DEVICE_ID_PLAYDOTCOM_EMS_USBII, HID_QUIRK_MULTI_INPUT }, { USB_VENDOR_ID_TOUCHPACK, USB_DEVICE_ID_TOUCHPACK_RTS, HID_QUIRK_MULTI_INPUT }, -- cgit v1.2.3 From 5166793feb688a884832ca656f161f683be8f04c Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 1 Aug 2011 12:42:14 -0400 Subject: arm: fix compile failure in orion5x/dns323-setup.c Upstream commit d5341942d784134f2997b3ff82cd63cf71d1f932 "PCI: Make the struct pci_dev * argument of pci_fixup_irqs const." leaked an extra "const" into an actual call site (vs a proto/decl) which causes this: arch/arm/mach-orion5x/dns323-setup.c: In function 'dns323_pci_map_irq': arch/arm/mach-orion5x/dns323-setup.c:80: error: expected expression before 'const' arch/arm/mach-orion5x/dns323-setup.c:80: error: too few arguments to function 'orion5x_pci_map_irq' make[3]: *** [arch/arm/mach-orion5x/dns323-setup.o] Error 1 Signed-off-by: Paul Gortmaker Acked-by: Nicolas Pitre Acked-by: Ralf Baechle --- arch/arm/mach-orion5x/dns323-setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c index a6eddae82a0b..c105556a0ee1 100644 --- a/arch/arm/mach-orion5x/dns323-setup.c +++ b/arch/arm/mach-orion5x/dns323-setup.c @@ -77,7 +77,7 @@ static int __init dns323_pci_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) /* * Check for devices with hard-wired IRQs. */ - irq = orion5x_pci_map_irq(const dev, slot, pin); + irq = orion5x_pci_map_irq(dev, slot, pin); if (irq != -1) return irq; -- cgit v1.2.3 From 158c0c623ab57e4e1cf705ce64b8efddc1cf82dd Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Wed, 17 Aug 2011 17:29:38 +0800 Subject: ARM: mach-orion5x: add missing header file This patch fixed following building error: -- arch/arm/mach-orion5x/pci.c: In function 'orion5x_pci_sys_setup': arch/arm/mach-orion5x/pci.c:563:2: error: 'vga_base' undeclared (first use in this function) arch/arm/mach-orion5x/pci.c:563:2: note: each undeclared identifier is reported only once for each function it appears in make[1]: *** [arch/arm/mach-orion5x/pci.o] Error 1 make[1]: *** Waiting for unfinished jobs.... -- Signed-off-by: Bryan Wu Acked-by: Rob Herring --- arch/arm/mach-orion5x/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c index 28b8760ab9fa..bc4a920e26ee 100644 --- a/arch/arm/mach-orion5x/pci.c +++ b/arch/arm/mach-orion5x/pci.c @@ -14,6 +14,7 @@ #include #include #include +#include