aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/SubmitChecklist4
-rw-r--r--Documentation/SubmittingPatches3
-rw-r--r--Documentation/cpu-hotplug.txt9
-rw-r--r--Documentation/device-mapper/delay.txt26
-rw-r--r--Documentation/fb/arkfb.txt68
-rw-r--r--Documentation/fb/vt8623fb.txt64
-rw-r--r--Documentation/md.txt72
-rw-r--r--Documentation/power/userland-swsusp.txt26
-rw-r--r--Documentation/vm/slabinfo.c426
-rw-r--r--arch/avr32/Makefile2
-rw-r--r--arch/avr32/kernel/process.c6
-rw-r--r--arch/avr32/kernel/ptrace.c2
-rw-r--r--arch/avr32/kernel/syscall_table.S1
-rw-r--r--arch/avr32/kernel/traps.c2
-rw-r--r--arch/avr32/kernel/vmlinux.lds.c2
-rw-r--r--arch/avr32/mach-at32ap/clock.c2
-rw-r--r--arch/avr32/mm/dma-coherent.c12
-rw-r--r--arch/blackfin/kernel/asm-offsets.c2
-rw-r--r--arch/blackfin/kernel/ptrace.c6
-rw-r--r--arch/frv/Kconfig6
-rw-r--r--arch/frv/kernel/process.c4
-rw-r--r--arch/frv/mm/pgalloc.c22
-rw-r--r--arch/h8300/kernel/asm-offsets.c2
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/i386/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/i386/kernel/cpu/transmeta.c6
-rw-r--r--arch/i386/kernel/cpuid.c2
-rw-r--r--arch/i386/kernel/microcode.c59
-rw-r--r--arch/i386/kernel/msr.c2
-rw-r--r--arch/i386/kernel/traps.c2
-rw-r--r--arch/i386/mach-generic/probe.c2
-rw-r--r--arch/i386/mach-voyager/voyager_basic.c4
-rw-r--r--arch/i386/pci/init.c2
-rw-r--r--arch/ia64/kernel/err_inject.c2
-rw-r--r--arch/ia64/kernel/mca.c2
-rw-r--r--arch/ia64/kernel/palinfo.c2
-rw-r--r--arch/ia64/kernel/salinfo.c2
-rw-r--r--arch/ia64/kernel/topology.c2
-rw-r--r--arch/m68knommu/kernel/asm-offsets.c2
-rw-r--r--arch/mips/kernel/asm-offsets.c2
-rw-r--r--arch/mips/kernel/smtc.c2
-rw-r--r--arch/parisc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c2
-rw-r--r--arch/powerpc/kernel/sysfs.c2
-rw-r--r--arch/powerpc/mm/numa.c3
-rw-r--r--arch/ppc/kernel/asm-offsets.c2
-rw-r--r--arch/s390/appldata/appldata_base.c2
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/smp.c2
-rw-r--r--arch/sparc/kernel/asm-offsets.c2
-rw-r--r--arch/sparc64/kernel/traps.c1
-rw-r--r--arch/um/Kconfig16
-rw-r--r--arch/um/Kconfig.scsi58
-rw-r--r--arch/um/kernel/skas/process.c9
-rw-r--r--arch/um/os-Linux/process.c2
-rw-r--r--arch/um/os-Linux/skas/mem.c2
-rw-r--r--arch/um/os-Linux/skas/process.c2
-rw-r--r--arch/v850/kernel/asm-offsets.c2
-rw-r--r--arch/x86_64/kernel/irq.c2
-rw-r--r--arch/x86_64/kernel/mce.c2
-rw-r--r--arch/x86_64/kernel/mce_amd.c2
-rw-r--r--arch/x86_64/kernel/vsyscall.c2
-rw-r--r--arch/xtensa/kernel/asm-offsets.c2
-rw-r--r--block/as-iosched.c2
-rw-r--r--block/genhd.c53
-rw-r--r--block/ll_rw_blk.c9
-rw-r--r--drivers/acpi/sleep/main.c67
-rw-r--r--drivers/acpi/sleep/proc.c2
-rw-r--r--drivers/ata/libata-core.c8
-rw-r--r--drivers/base/topology.c3
-rw-r--r--drivers/block/loop.c6
-rw-r--r--drivers/block/nbd.c15
-rw-r--r--drivers/char/hw_random/Kconfig14
-rw-r--r--drivers/char/hw_random/Makefile1
-rw-r--r--drivers/char/hw_random/pasemi-rng.c156
-rw-r--r--drivers/char/pcmcia/Kconfig1
-rw-r--r--drivers/char/pcmcia/cm4000_cs.c44
-rw-r--r--drivers/char/pcmcia/cm4040_cs.c7
-rw-r--r--drivers/char/tty_io.c14
-rw-r--r--drivers/cpufreq/cpufreq.c3
-rw-r--r--drivers/cpufreq/cpufreq_stats.c2
-rw-r--r--drivers/hwmon/coretemp.c2
-rw-r--r--drivers/i2c/chips/tps65010.c2
-rw-r--r--drivers/infiniband/hw/ehca/ehca_irq.c6
-rw-r--r--drivers/kvm/kvm_main.c3
-rw-r--r--drivers/mca/mca-bus.c28
-rw-r--r--drivers/mca/mca-driver.c13
-rw-r--r--drivers/md/Kconfig9
-rw-r--r--drivers/md/Makefile1
-rw-r--r--drivers/md/dm-bio-list.h26
-rw-r--r--drivers/md/dm-crypt.c91
-rw-r--r--drivers/md/dm-delay.c383
-rw-r--r--drivers/md/dm-exception-store.c54
-rw-r--r--drivers/md/dm-hw-handler.h1
-rw-r--r--drivers/md/dm-io.c232
-rw-r--r--drivers/md/dm-io.h83
-rw-r--r--drivers/md/dm-log.c77
-rw-r--r--drivers/md/dm-mpath.c3
-rw-r--r--drivers/md/dm-raid1.c187
-rw-r--r--drivers/md/dm-table.c10
-rw-r--r--drivers/md/dm.c1
-rw-r--r--drivers/md/kcopyd.c28
-rw-r--r--drivers/md/md.c186
-rw-r--r--drivers/md/raid1.c1
-rw-r--r--drivers/md/raid5.c6
-rw-r--r--drivers/mmc/core/core.c4
-rw-r--r--drivers/net/e1000/e1000_main.c2
-rw-r--r--drivers/net/phy/phy.c6
-rw-r--r--drivers/net/tg3.c11
-rw-r--r--drivers/net/tg3.h2
-rw-r--r--drivers/spi/atmel_spi.c5
-rw-r--r--drivers/usb/atm/usbatm.c2
-rw-r--r--drivers/video/Kconfig28
-rw-r--r--drivers/video/Makefile2
-rw-r--r--drivers/video/arkfb.c1200
-rw-r--r--drivers/video/fbmem.c4
-rw-r--r--drivers/video/nvidia/nv_hw.c7
-rw-r--r--drivers/video/nvidia/nvidia.c1
-rw-r--r--drivers/video/s3fb.c19
-rw-r--r--drivers/video/svgalib.c17
-rw-r--r--drivers/video/vt8623fb.c927
-rw-r--r--fs/affs/file.c6
-rw-r--r--fs/afs/Makefile3
-rw-r--r--fs/afs/afs_fs.h2
-rw-r--r--fs/afs/callback.c9
-rw-r--r--fs/afs/dir.c19
-rw-r--r--fs/afs/file.c107
-rw-r--r--fs/afs/fsclient.c369
-rw-r--r--fs/afs/inode.c70
-rw-r--r--fs/afs/internal.h95
-rw-r--r--fs/afs/main.c2
-rw-r--r--fs/afs/misc.c1
-rw-r--r--fs/afs/mntpt.c5
-rw-r--r--fs/afs/rxrpc.c80
-rw-r--r--fs/afs/security.c12
-rw-r--r--fs/afs/server.c3
-rw-r--r--fs/afs/super.c5
-rw-r--r--fs/afs/vnode.c121
-rw-r--r--fs/afs/write.c835
-rw-r--r--fs/aio.c7
-rw-r--r--fs/binfmt_misc.c13
-rw-r--r--fs/buffer.c58
-rw-r--r--fs/configfs/file.c33
-rw-r--r--fs/direct-io.c8
-rw-r--r--fs/ext3/inode.c12
-rw-r--r--fs/mpage.c15
-rw-r--r--fs/namei.c8
-rw-r--r--fs/nfsd/Makefile1
-rw-r--r--fs/nfsd/export.c14
-rw-r--r--fs/nfsd/nfs3proc.c2
-rw-r--r--fs/nfsd/nfs3xdr.c71
-rw-r--r--fs/nfsd/nfs4acl.c17
-rw-r--r--fs/nfsd/nfs4state.c2
-rw-r--r--fs/nfsd/nfsfh.c56
-rw-r--r--fs/nfsd/nfsproc.c2
-rw-r--r--fs/nfsd/nfsxdr.c53
-rw-r--r--fs/reiserfs/file.c39
-rw-r--r--fs/reiserfs/inode.c13
-rw-r--r--fs/sysfs/file.c33
-rw-r--r--fs/xfs/xfs_mount.c3
-rw-r--r--include/asm-alpha/smp.h1
-rw-r--r--include/asm-alpha/thread_info.h8
-rw-r--r--include/asm-arm/arch-at91/cpu.h6
-rw-r--r--include/asm-avr32/arch-at32ap/cpu.h33
-rw-r--r--include/asm-avr32/setup.h2
-rw-r--r--include/asm-avr32/unistd.h4
-rw-r--r--include/asm-blackfin/processor.h6
-rw-r--r--include/asm-blackfin/system.h4
-rw-r--r--include/asm-frv/tlb.h4
-rw-r--r--include/asm-i386/mmzone.h6
-rw-r--r--include/asm-i386/msr.h56
-rw-r--r--include/asm-i386/paravirt.h5
-rw-r--r--include/asm-i386/smp.h37
-rw-r--r--include/asm-i386/thread_info.h2
-rw-r--r--include/asm-ia64/smp.h6
-rw-r--r--include/asm-ia64/thread_info.h2
-rw-r--r--include/asm-m32r/smp.h6
-rw-r--r--include/asm-m68k/thread_info.h6
-rw-r--r--include/asm-mips/system.h2
-rw-r--r--include/asm-parisc/compat.h2
-rw-r--r--include/asm-powerpc/smp.h1
-rw-r--r--include/asm-s390/smp.h1
-rw-r--r--include/asm-sh/cpu-sh3/dma.h2
-rw-r--r--include/asm-sh/cpu-sh4/dma-sh7780.h2
-rw-r--r--include/asm-sh/cpu-sh4/dma.h2
-rw-r--r--include/asm-sparc/smp.h1
-rw-r--r--include/asm-sparc64/smp.h1
-rw-r--r--include/asm-um/required-features.h9
-rw-r--r--include/asm-um/smp.h4
-rw-r--r--include/asm-x86_64/smp.h14
-rw-r--r--include/asm-x86_64/system.h2
-rw-r--r--include/asm-x86_64/thread_info.h2
-rw-r--r--include/linux/aio.h3
-rw-r--r--include/linux/blkdev.h2
-rw-r--r--include/linux/clocksource.h3
-rw-r--r--include/linux/compat.h3
-rw-r--r--include/linux/compiler-gcc.h1
-rw-r--r--include/linux/compiler-gcc3.h6
-rw-r--r--include/linux/compiler-gcc4.h3
-rw-r--r--include/linux/compiler.h21
-rw-r--r--include/linux/fb.h2
-rw-r--r--include/linux/futex.h42
-rw-r--r--include/linux/genhd.h1
-rw-r--r--include/linux/gfp.h6
-rw-r--r--include/linux/highmem.h27
-rw-r--r--include/linux/init_task.h2
-rw-r--r--include/linux/kthread.h3
-rw-r--r--include/linux/ktime.h6
-rw-r--r--include/linux/mca.h2
-rw-r--r--include/linux/mmzone.h3
-rw-r--r--include/linux/module.h3
-rw-r--r--include/linux/mutex.h5
-rw-r--r--include/linux/nfs4_acl.h1
-rw-r--r--include/linux/notifier.h66
-rw-r--r--include/linux/pm.h31
-rw-r--r--include/linux/raid/md_k.h1
-rw-r--r--include/linux/relay.h3
-rw-r--r--include/linux/sched.h9
-rw-r--r--include/linux/signal.h125
-rw-r--r--include/linux/smp.h1
-rw-r--r--include/linux/sunrpc/svc.h19
-rw-r--r--include/linux/sunrpc/svcsock.h3
-rw-r--r--include/linux/suspend.h24
-rw-r--r--include/linux/svga.h2
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/vmstat.h3
-rw-r--r--include/linux/workqueue.h95
-rw-r--r--init/Kconfig24
-rw-r--r--init/do_mounts.c7
-rw-r--r--init/main.c5
-rw-r--r--kernel/configs.c15
-rw-r--r--kernel/cpu.c66
-rw-r--r--kernel/cpuset.c7
-rw-r--r--kernel/exit.c18
-rw-r--r--kernel/fork.c4
-rw-r--r--kernel/futex.c988
-rw-r--r--kernel/futex_compat.c22
-rw-r--r--kernel/hrtimer.c2
-rw-r--r--kernel/irq/handle.c1
-rw-r--r--kernel/kmod.c6
-rw-r--r--kernel/kthread.c113
-rw-r--r--kernel/mutex.c8
-rw-r--r--kernel/power/disk.c195
-rw-r--r--kernel/power/main.c42
-rw-r--r--kernel/power/power.h7
-rw-r--r--kernel/power/snapshot.c2
-rw-r--r--kernel/power/user.c13
-rw-r--r--kernel/profile.c4
-rw-r--r--kernel/rcupdate.c2
-rw-r--r--kernel/relay.c37
-rw-r--r--kernel/rtmutex.c41
-rw-r--r--kernel/rtmutex_common.h34
-rw-r--r--kernel/sched.c38
-rw-r--r--kernel/signal.c140
-rw-r--r--kernel/softirq.c4
-rw-r--r--kernel/softlockup.c4
-rw-r--r--kernel/sys.c96
-rw-r--r--kernel/sysctl.c12
-rw-r--r--kernel/time/clocksource.c51
-rw-r--r--kernel/time/timer_list.c25
-rw-r--r--kernel/timer.c14
-rw-r--r--kernel/workqueue.c783
-rw-r--r--lib/radix-tree.c2
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/filemap_xip.c7
-rw-r--r--mm/hugetlb.c33
-rw-r--r--mm/page_alloc.c50
-rw-r--r--mm/slab.c41
-rw-r--r--mm/slub.c1050
-rw-r--r--mm/swap.c2
-rw-r--r--mm/truncate.c3
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/vmstat.c95
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/flow.c2
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c1
-rw-r--r--net/iucv/iucv.c6
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c15
-rw-r--r--net/sunrpc/rpc_pipe.c9
-rw-r--r--net/sunrpc/sunrpc_syms.c6
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svcauth_unix.c10
-rw-r--r--net/sunrpc/svcsock.c34
-rwxr-xr-xscripts/kernel-doc7
-rw-r--r--scripts/mod/modpost.c1
285 files changed, 9123 insertions, 3127 deletions
diff --git a/Documentation/SubmitChecklist b/Documentation/SubmitChecklist
index bd23dc0bc0c..6491b2c45dd 100644
--- a/Documentation/SubmitChecklist
+++ b/Documentation/SubmitChecklist
@@ -80,3 +80,7 @@ kernel patches.
23: Tested after it has been merged into the -mm patchset to make sure
that it still works with all of the other queued patches and various
changes in the VM, VFS, and other subsystems.
+
+24: Avoid whitespace damage such as indenting with spaces or whitespace
+ at the end of lines. You can test this by feeding the patch to
+ "git apply --check --whitespace=error-all"
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index b0d0043f7c4..a417b25fb1a 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -363,7 +363,8 @@ area or subsystem of the kernel is being patched.
The "summary phrase" in the email's Subject should concisely
describe the patch which that email contains. The "summary
phrase" should not be a filename. Do not use the same "summary
-phrase" for every patch in a whole patch series.
+phrase" for every patch in a whole patch series (where a "patch
+series" is an ordered sequence of multiple, related patches).
Bear in mind that the "summary phrase" of your email becomes
a globally-unique identifier for that patch. It propagates
diff --git a/Documentation/cpu-hotplug.txt b/Documentation/cpu-hotplug.txt
index cc60d29b954..b6d24c22274 100644
--- a/Documentation/cpu-hotplug.txt
+++ b/Documentation/cpu-hotplug.txt
@@ -217,14 +217,17 @@ Q: What happens when a CPU is being logically offlined?
A: The following happen, listed in no particular order :-)
- A notification is sent to in-kernel registered modules by sending an event
- CPU_DOWN_PREPARE
+ CPU_DOWN_PREPARE or CPU_DOWN_PREPARE_FROZEN, depending on whether or not the
+ CPU is being offlined while tasks are frozen due to a suspend operation in
+ progress
- All process is migrated away from this outgoing CPU to a new CPU
- All interrupts targeted to this CPU is migrated to a new CPU
- timers/bottom half/task lets are also migrated to a new CPU
- Once all services are migrated, kernel calls an arch specific routine
__cpu_disable() to perform arch specific cleanup.
- Once this is successful, an event for successful cleanup is sent by an event
- CPU_DEAD.
+ CPU_DEAD (or CPU_DEAD_FROZEN if tasks are frozen due to a suspend while the
+ CPU is being offlined).
"It is expected that each service cleans up when the CPU_DOWN_PREPARE
notifier is called, when CPU_DEAD is called its expected there is nothing
@@ -242,9 +245,11 @@ A: This is what you would need in your kernel code to receive notifications.
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
foobar_online_action(cpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
foobar_dead_action(cpu);
break;
}
diff --git a/Documentation/device-mapper/delay.txt b/Documentation/device-mapper/delay.txt
new file mode 100644
index 00000000000..15adc55359e
--- /dev/null
+++ b/Documentation/device-mapper/delay.txt
@@ -0,0 +1,26 @@
+dm-delay
+========
+
+Device-Mapper's "delay" target delays reads and/or writes
+and maps them to different devices.
+
+Parameters:
+ <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
+
+With separate write parameters, the first set is only used for reads.
+Delays are specified in milliseconds.
+
+Example scripts
+===============
+[[
+#!/bin/sh
+# Create device delaying rw operation for 500ms
+echo "0 `blockdev --getsize $1` delay $1 0 500" | dmsetup create delayed
+]]
+
+[[
+#!/bin/sh
+# Create device delaying only write operation for 500ms and
+# splitting reads and writes to different devices $1 $2
+echo "0 `blockdev --getsize $1` delay $1 0 0 $2 0 500" | dmsetup create delayed
+]]
diff --git a/Documentation/fb/arkfb.txt b/Documentation/fb/arkfb.txt
new file mode 100644
index 00000000000..e8487a9d6a0
--- /dev/null
+++ b/Documentation/fb/arkfb.txt
@@ -0,0 +1,68 @@
+
+ arkfb - fbdev driver for ARK Logic chips
+ ========================================
+
+
+Supported Hardware
+==================
+
+ ARK 2000PV chip
+ ICS 5342 ramdac
+
+ - only BIOS initialized VGA devices supported
+ - probably not working on big endian
+
+
+Supported Features
+==================
+
+ * 4 bpp pseudocolor modes (with 18bit palette, two variants)
+ * 8 bpp pseudocolor mode (with 18bit palette)
+ * 16 bpp truecolor modes (RGB 555 and RGB 565)
+ * 24 bpp truecolor mode (RGB 888)
+ * 32 bpp truecolor mode (RGB 888)
+ * text mode (activated by bpp = 0)
+ * doublescan mode variant (not available in text mode)
+ * panning in both directions
+ * suspend/resume support
+
+Text mode is supported even in higher resolutions, but there is limitation to
+lower pixclocks (i got maximum about 70 MHz, it is dependent on specific
+hardware). This limitation is not enforced by driver. Text mode supports 8bit
+wide fonts only (hardware limitation) and 16bit tall fonts (driver
+limitation). Unfortunately character attributes (like color) in text mode are
+broken for unknown reason, so its usefulness is limited.
+
+There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
+packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
+with interleaved planes (1 byte interleave), MSB first. Both modes support
+8bit wide fonts only (driver limitation).
+
+Suspend/resume works on systems that initialize video card during resume and
+if device is active (for example used by fbcon).
+
+
+Missing Features
+================
+(alias TODO list)
+
+ * secondary (not initialized by BIOS) device support
+ * big endian support
+ * DPMS support
+ * MMIO support
+ * interlaced mode variant
+ * support for fontwidths != 8 in 4 bpp modes
+ * support for fontheight != 16 in text mode
+ * hardware cursor
+ * vsync synchronization
+ * feature connector support
+ * acceleration support (8514-like 2D)
+
+
+Known bugs
+==========
+
+ * character attributes (and cursor) in text mode are broken
+
+--
+Ondrej Zajicek <santiago@crfreenet.org>
diff --git a/Documentation/fb/vt8623fb.txt b/Documentation/fb/vt8623fb.txt
new file mode 100644
index 00000000000..f654576c56b
--- /dev/null
+++ b/Documentation/fb/vt8623fb.txt
@@ -0,0 +1,64 @@
+
+ vt8623fb - fbdev driver for graphics core in VIA VT8623 chipset
+ ===============================================================
+
+
+Supported Hardware
+==================
+
+ VIA VT8623 [CLE266] chipset and its graphics core
+ (known as CastleRock or Unichrome)
+
+I tested vt8623fb on VIA EPIA ML-6000
+
+
+Supported Features
+==================
+
+ * 4 bpp pseudocolor modes (with 18bit palette, two variants)
+ * 8 bpp pseudocolor mode (with 18bit palette)
+ * 16 bpp truecolor mode (RGB 565)
+ * 32 bpp truecolor mode (RGB 888)
+ * text mode (activated by bpp = 0)
+ * doublescan mode variant (not available in text mode)
+ * panning in both directions
+ * suspend/resume support
+ * DPMS support
+
+Text mode is supported even in higher resolutions, but there is limitation to
+lower pixclocks (maximum about 100 MHz). This limitation is not enforced by
+driver. Text mode supports 8bit wide fonts only (hardware limitation) and
+16bit tall fonts (driver limitation).
+
+There are two 4 bpp modes. First mode (selected if nonstd == 0) is mode with
+packed pixels, high nibble first. Second mode (selected if nonstd == 1) is mode
+with interleaved planes (1 byte interleave), MSB first. Both modes support
+8bit wide fonts only (driver limitation).
+
+Suspend/resume works on systems that initialize video card during resume and
+if device is active (for example used by fbcon).
+
+
+Missing Features
+================
+(alias TODO list)
+
+ * secondary (not initialized by BIOS) device support
+ * MMIO support
+ * interlaced mode variant
+ * support for fontwidths != 8 in 4 bpp modes
+ * support for fontheight != 16 in text mode
+ * hardware cursor
+ * video overlay support
+ * vsync synchronization
+ * acceleration support (8514-like 2D, busmaster transfers)
+
+
+Known bugs
+==========
+
+ * cursor disable in text mode doesn't work
+
+
+--
+Ondrej Zajicek <santiago@crfreenet.org>
diff --git a/Documentation/md.txt b/Documentation/md.txt
index 2202f5dc8ac..5818628207b 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -178,6 +178,21 @@ All md devices contain:
The size should be at least PAGE_SIZE (4k) and should be a power
of 2. This can only be set while assembling an array
+ layout
+ The "layout" for the array for the particular level. This is
+ simply a number that is interpretted differently by different
+ levels. It can be written while assembling an array.
+
+ reshape_position
+ This is either "none" or a sector number within the devices of
+ the array where "reshape" is up to. If this is set, the three
+ attributes mentioned above (raid_disks, chunk_size, layout) can
+ potentially have 2 values, an old and a new value. If these
+ values differ, reading the attribute returns
+ new (old)
+ and writing will effect the 'new' value, leaving the 'old'
+ unchanged.
+
component_size
For arrays with data redundancy (i.e. not raid0, linear, faulty,
multipath), all components must be the same size - or at least
@@ -193,11 +208,6 @@ All md devices contain:
1.2 (newer format in varying locations) or "none" indicating that
the kernel isn't managing metadata at all.
- layout
- The "layout" for the array for the particular level. This is
- simply a number that is interpretted differently by different
- levels. It can be written while assembling an array.
-
resync_start
The point at which resync should start. If no resync is needed,
this will be a very large number. At array creation it will
@@ -259,29 +269,6 @@ All md devices contain:
like active, but no writes have been seen for a while (safe_mode_delay).
- sync_speed_min
- sync_speed_max
- This are similar to /proc/sys/dev/raid/speed_limit_{min,max}
- however they only apply to the particular array.
- If no value has been written to these, of if the word 'system'
- is written, then the system-wide value is used. If a value,
- in kibibytes-per-second is written, then it is used.
- When the files are read, they show the currently active value
- followed by "(local)" or "(system)" depending on whether it is
- a locally set or system-wide value.
-
- sync_completed
- This shows the number of sectors that have been completed of
- whatever the current sync_action is, followed by the number of
- sectors in total that could need to be processed. The two
- numbers are separated by a '/' thus effectively showing one
- value, a fraction of the process that is complete.
-
- sync_speed
- This shows the current actual speed, in K/sec, of the current
- sync_action. It is averaged over the last 30 seconds.
-
-
As component devices are added to an md array, they appear in the 'md'
directory as new directories named
dev-XXX
@@ -412,6 +399,35 @@ also have
Note that the numbers are 'bit' numbers, not 'block' numbers.
They should be scaled by the bitmap_chunksize.
+ sync_speed_min
+ sync_speed_max
+ This are similar to /proc/sys/dev/raid/speed_limit_{min,max}
+ however they only apply to the particular array.
+ If no value has been written to these, of if the word 'system'
+ is written, then the system-wide value is used. If a value,
+ in kibibytes-per-second is written, then it is used.
+ When the files are read, they show the currently active value
+ followed by "(local)" or "(system)" depending on whether it is
+ a locally set or system-wide value.
+
+ sync_completed
+ This shows the number of sectors that have been completed of
+ whatever the current sync_action is, followed by the number of
+ sectors in total that could need to be processed. The two
+ numbers are separated by a '/' thus effectively showing one
+ value, a fraction of the process that is complete.
+
+ sync_speed
+ This shows the current actual speed, in K/sec, of the current
+ sync_action. It is averaged over the last 30 seconds.
+
+ suspend_lo
+ suspend_hi
+ The two values, given as numbers of sectors, indicate a range
+ within the array where IO will be blocked. This is currently
+ only supported for raid4/5/6.
+
+
Each active md device may also have attributes specific to the
personality module that manages it.
These are specific to the implementation of the module and could
diff --git a/Documentation/power/userland-swsusp.txt b/Documentation/power/userland-swsusp.txt
index 000556c932e..e00c6cf09e8 100644
--- a/Documentation/power/userland-swsusp.txt
+++ b/Documentation/power/userland-swsusp.txt
@@ -93,21 +93,23 @@ SNAPSHOT_S2RAM - suspend to RAM; using this call causes the kernel to
to resume the system from RAM if there's enough battery power or restore
its state on the basis of the saved suspend image otherwise)
-SNAPSHOT_PMOPS - enable the usage of the pmops->prepare, pmops->enter and
- pmops->finish methods (the in-kernel swsusp knows these as the "platform
- method") which are needed on many machines to (among others) speed up
- the resume by letting the BIOS skip some steps or to let the system
- recognise the correct state of the hardware after the resume (in
- particular on many machines this ensures that unplugged AC
- adapters get correctly detected and that kacpid does not run wild after
- the resume). The last ioctl() argument can take one of the three
- values, defined in kernel/power/power.h:
+SNAPSHOT_PMOPS - enable the usage of the hibernation_ops->prepare,
+ hibernate_ops->enter and hibernation_ops->finish methods (the in-kernel
+ swsusp knows these as the "platform method") which are needed on many
+ machines to (among others) speed up the resume by letting the BIOS skip
+ some steps or to let the system recognise the correct state of the
+ hardware after the resume (in particular on many machines this ensures
+ that unplugged AC adapters get correctly detected and that kacpid does
+ not run wild after the resume). The last ioctl() argument can take one
+ of the three values, defined in kernel/power/power.h:
PMOPS_PREPARE - make the kernel carry out the
- pm_ops->prepare(PM_SUSPEND_DISK) operation
+ hibernation_ops->prepare() operation
PMOPS_ENTER - make the kernel power off the system by calling
- pm_ops->enter(PM_SUSPEND_DISK)
+ hibernation_ops->enter()
PMOPS_FINISH - make the kernel carry out the
- pm_ops->finish(PM_SUSPEND_DISK) operation
+ hibernation_ops->finish() operation
+ Note that the actual constants are misnamed because they surface
+ internal kernel implementation details that have changed.
The device's read() operation can be used to transfer the snapshot image from
the kernel. It has the following limitations:
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index 41710ccf3a2..686a8e04a4f 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -16,6 +16,7 @@
#include <stdarg.h>
#include <getopt.h>
#include <regex.h>
+#include <errno.h>
#define MAX_SLABS 500
#define MAX_ALIASES 500
@@ -41,12 +42,15 @@ struct aliasinfo {
} aliasinfo[MAX_ALIASES];
int slabs = 0;
+int actual_slabs = 0;
int aliases = 0;
int alias_targets = 0;
int highest_node = 0;
char buffer[4096];
+int show_empty = 0;
+int show_report = 0;
int show_alias = 0;
int show_slab = 0;
int skip_zero = 1;
@@ -59,6 +63,15 @@ int show_inverted = 0;
int show_single_ref = 0;
int show_totals = 0;
int sort_size = 0;
+int set_debug = 0;
+int show_ops = 0;
+
+/* Debug options */
+int sanity = 0;
+int redzone = 0;
+int poison = 0;
+int tracking = 0;
+int tracing = 0;
int page_size;
@@ -76,20 +89,33 @@ void fatal(const char *x, ...)
void usage(void)
{
- printf("slabinfo [-ahnpvtsz] [slab-regexp]\n"
+ printf("slabinfo 5/7/2007. (c) 2007 sgi. clameter@sgi.com\n\n"
+ "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
"-a|--aliases Show aliases\n"
+ "-d<options>|--debug=<options> Set/Clear Debug options\n"
+ "-e|--empty Show empty slabs\n"
+ "-f|--first-alias Show first alias\n"
"-h|--help Show usage information\n"
+ "-i|--inverted Inverted list\n"
+ "-l|--slabs Show slabs\n"
"-n|--numa Show NUMA information\n"
+ "-o|--ops Show kmem_cache_ops\n"
"-s|--shrink Shrink slabs\n"
- "-v|--validate Validate slabs\n"
+ "-r|--report Detailed report on single slabs\n"
+ "-S|--Size Sort by size\n"
"-t|--tracking Show alloc/free information\n"
"-T|--Totals Show summary information\n"
- "-l|--slabs Show slabs\n"
- "-S|--Size Sort by size\n"
+ "-v|--validate Validate slabs\n"
"-z|--zero Include empty slabs\n"
- "-f|--first-alias Show first alias\n"
- "-i|--inverted Inverted list\n"
"-1|--1ref Single reference\n"
+ "\nValid debug options (FZPUT may be combined)\n"
+ "a / A Switch on all debug options (=FZUP)\n"
+ "- Switch off all debug options\n"
+ "f / F Sanity Checks (SLAB_DEBUG_FREE)\n"
+ "z / Z Redzoning\n"
+ "p / P Poisoning\n"
+ "u / U Tracking\n"
+ "t / T Tracing\n"
);
}
@@ -143,11 +169,10 @@ unsigned long get_obj_and_str(char *name, char **x)
void set_obj(struct slabinfo *s, char *name, int n)
{
char x[100];
+ FILE *f;
sprintf(x, "%s/%s", s->name, name);
-
- FILE *f = fopen(x, "w");
-
+ f = fopen(x, "w");
if (!f)
fatal("Cannot write to %s\n", x);
@@ -155,6 +180,26 @@ void set_obj(struct slabinfo *s, char *name, int n)
fclose(f);
}
+unsigned long read_slab_obj(struct slabinfo *s, char *name)
+{
+ char x[100];
+ FILE *f;
+ int l;
+
+ sprintf(x, "%s/%s", s->name, name);
+ f = fopen(x, "r");
+ if (!f) {
+ buffer[0] = 0;
+ l = 0;
+ } else {
+ l = fread(buffer, 1, sizeof(buffer), f);
+ buffer[l] = 0;
+ fclose(f);
+ }
+ return l;
+}
+
+
/*
* Put a size string together
*/
@@ -226,7 +271,7 @@ int line = 0;
void first_line(void)
{
- printf("Name Objects Objsize Space "
+ printf("Name Objects Objsize Space "
"Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n");
}
@@ -246,10 +291,7 @@ struct aliasinfo *find_one_alias(struct slabinfo *find)
return best;
}
}
- if (best)
- return best;
- fatal("Cannot find alias for %s\n", find->name);
- return NULL;
+ return best;
}
unsigned long slab_size(struct slabinfo *s)
@@ -257,6 +299,126 @@ unsigned long slab_size(struct slabinfo *s)
return s->slabs * (page_size << s->order);
}
+void slab_numa(struct slabinfo *s, int mode)
+{
+ int node;
+
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (!highest_node) {
+ printf("\n%s: No NUMA information available.\n", s->name);
+ return;
+ }
+
+ if (skip_zero && !s->slabs)
+ return;
+
+ if (!line) {
+ printf("\n%-21s:", mode ? "NUMA nodes" : "Slab");
+ for(node = 0; node <= highest_node; node++)
+ printf(" %4d", node);
+ printf("\n----------------------");
+ for(node = 0; node <= highest_node; node++)
+ printf("-----");
+ printf("\n");
+ }
+ printf("%-21s ", mode ? "All slabs" : s->name);
+ for(node = 0; node <= highest_node; node++) {
+ char b[20];
+
+ store_size(b, s->numa[node]);
+ printf(" %4s", b);
+ }
+ printf("\n");
+ if (mode) {
+ printf("%-21s ", "Partial slabs");
+ for(node = 0; node <= highest_node; node++) {
+ char b[20];
+
+ store_size(b, s->numa_partial[node]);
+ printf(" %4s", b);
+ }
+ printf("\n");
+ }
+ line++;
+}
+
+void show_tracking(struct slabinfo *s)
+{
+ printf("\n%s: Kernel object allocation\n", s->name);
+ printf("-----------------------------------------------------------------------\n");
+ if (read_slab_obj(s, "alloc_calls"))
+ printf(buffer);
+ else
+ printf("No Data\n");
+
+ printf("\n%s: Kernel object freeing\n", s->name);
+ printf("------------------------------------------------------------------------\n");
+ if (read_slab_obj(s, "free_calls"))
+ printf(buffer);
+ else
+ printf("No Data\n");
+
+}
+
+void ops(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (read_slab_obj(s, "ops")) {
+ printf("\n%s: kmem_cache operations\n", s->name);
+ printf("--------------------------------------------\n");
+ printf(buffer);
+ } else
+ printf("\n%s has no kmem_cache operations\n", s->name);
+}
+
+const char *onoff(int x)
+{
+ if (x)
+ return "On ";
+ return "Off";
+}
+
+void report(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+ printf("\nSlabcache: %-20s Aliases: %2d Order : %2d\n", s->name, s->aliases, s->order);
+ if (s->hwcache_align)
+ printf("** Hardware cacheline aligned\n");
+ if (s->cache_dma)
+ printf("** Memory is allocated in a special DMA zone\n");
+ if (s->destroy_by_rcu)
+ printf("** Slabs are destroyed via RCU\n");
+ if (s->reclaim_account)
+ printf("** Reclaim accounting active\n");
+
+ printf("\nSizes (bytes) Slabs Debug Memory\n");
+ printf("------------------------------------------------------------------------\n");
+ printf("Object : %7d Total : %7ld Sanity Checks : %s Total: %7ld\n",
+ s->object_size, s->slabs, onoff(s->sanity_checks),
+ s->slabs * (page_size << s->order));
+ printf("SlabObj: %7d Full : %7ld Redzoning : %s Used : %7ld\n",
+ s->slab_size, s->slabs - s->partial - s->cpu_slabs,
+ onoff(s->red_zone), s->objects * s->object_size);
+ printf("SlabSiz: %7d Partial: %7ld Poisoning : %s Loss : %7ld\n",
+ page_size << s->order, s->partial, onoff(s->poison),
+ s->slabs * (page_size << s->order) - s->objects * s->object_size);
+ printf("Loss : %7d CpuSlab: %7d Tracking : %s Lalig: %7ld\n",
+ s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user),
+ (s->slab_size - s->object_size) * s->objects);
+ printf("Align : %7d Objects: %7d Tracing : %s Lpadd: %7ld\n",
+ s->align, s->objs_per_slab, onoff(s->trace),
+ ((page_size << s->order) - s->objs_per_slab * s->slab_size) *
+ s->slabs);
+
+ ops(s);
+ show_tracking(s);
+ slab_numa(s, 1);
+}
void slabcache(struct slabinfo *s)
{
@@ -265,7 +427,18 @@ void slabcache(struct slabinfo *s)
char flags[20];
char *p = flags;
- if (skip_zero && !s->slabs)
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (actual_slabs == 1) {
+ report(s);
+ return;
+ }
+
+ if (skip_zero && !show_empty && !s->slabs)
+ return;
+
+ if (show_empty && s->slabs)
return;
store_size(size_str, slab_size(s));
@@ -303,48 +476,128 @@ void slabcache(struct slabinfo *s)
flags);
}
-void slab_numa(struct slabinfo *s)
+/*
+ * Analyze debug options. Return false if something is amiss.
+ */
+int debug_opt_scan(char *opt)
{
- int node;
+ if (!opt || !opt[0] || strcmp(opt, "-") == 0)
+ return 1;
+
+ if (strcasecmp(opt, "a") == 0) {
+ sanity = 1;
+ poison = 1;
+ redzone = 1;
+ tracking = 1;
+ return 1;
+ }
- if (!highest_node)
- fatal("No NUMA information available.\n");
+ for ( ; *opt; opt++)
+ switch (*opt) {
+ case 'F' : case 'f':
+ if (sanity)
+ return 0;
+ sanity = 1;
+ break;
+ case 'P' : case 'p':
+ if (poison)
+ return 0;
+ poison = 1;
+ break;
- if (skip_zero && !s->slabs)
- return;
+ case 'Z' : case 'z':
+ if (redzone)
+ return 0;
+ redzone = 1;
+ break;
- if (!line) {
- printf("\nSlab Node ");
- for(node = 0; node <= highest_node; node++)
- printf(" %4d", node);
- printf("\n----------------------");
- for(node = 0; node <= highest_node; node++)
- printf("-----");
- printf("\n");
- }
- printf("%-21s ", s->name);
- for(node = 0; node <= highest_node; node++) {
- char b[20];
+ case 'U' : case 'u':
+ if (tracking)
+ return 0;
+ tracking = 1;
+ break;
- store_size(b, s->numa[node]);
- printf(" %4s", b);
- }
- printf("\n");
- line++;
+ case 'T' : case 't':
+ if (tracing)
+ return 0;
+ tracing = 1;
+ break;
+ default:
+ return 0;
+ }
+ return 1;
}
-void show_tracking(struct slabinfo *s)
+int slab_empty(struct slabinfo *s)
{
- printf("\n%s: Calls to allocate a slab object\n", s->name);
- printf("---------------------------------------------------\n");
- if (read_obj("alloc_calls"))
- printf(buffer);
+ if (s->objects > 0)
+ return 0;
- printf("%s: Calls to free a slab object\n", s->name);
- printf("-----------------------------------------------\n");
- if (read_obj("free_calls"))
- printf(buffer);
+ /*
+ * We may still have slabs even if there are no objects. Shrinking will
+ * remove them.
+ */
+ if (s->slabs != 0)
+ set_obj(s, "shrink", 1);
+ return 1;
+}
+
+void slab_debug(struct slabinfo *s)
+{
+ if (sanity && !s->sanity_checks) {
+ set_obj(s, "sanity", 1);
+ }
+ if (!sanity && s->sanity_checks) {
+ if (slab_empty(s))
+ set_obj(s, "sanity", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name);
+ }
+ if (redzone && !s->red_zone) {
+ if (slab_empty(s))
+ set_obj(s, "red_zone", 1);
+ else
+ fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name);
+ }
+ if (!redzone && s->red_zone) {
+ if (slab_empty(s))
+ set_obj(s, "red_zone", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name);
+ }
+ if (poison && !s->poison) {
+ if (slab_empty(s))
+ set_obj(s, "poison", 1);
+ else
+ fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name);
+ }
+ if (!poison && s->poison) {
+ if (slab_empty(s))
+ set_obj(s, "poison", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name);
+ }
+ if (tracking && !s->store_user) {
+ if (slab_empty(s))
+ set_obj(s, "store_user", 1);
+ else
+ fprintf(stderr, "%s not empty cannot enable tracking\n", s->name);
+ }
+ if (!tracking && s->store_user) {
+ if (slab_empty(s))
+ set_obj(s, "store_user", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable tracking\n", s->name);
+ }
+ if (tracing && !s->trace) {
+ if (slabs == 1)
+ set_obj(s, "trace", 1);
+ else
+ fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name);
+ }
+ if (!tracing && s->trace)
+ set_obj(s, "trace", 1);
}
void totals(void)
@@ -673,7 +926,7 @@ void link_slabs(void)
for (a = aliasinfo; a < aliasinfo + aliases; a++) {
- for(s = slabinfo; s < slabinfo + slabs; s++)
+ for (s = slabinfo; s < slabinfo + slabs; s++)
if (strcmp(a->ref, s->name) == 0) {
a->slab = s;
s->refs++;
@@ -704,7 +957,7 @@ void alias(void)
continue;
}
}
- printf("\n%-20s <- %s", a->slab->name, a->name);
+ printf("\n%-12s <- %s", a->slab->name, a->name);
active = a->slab->name;
}
else
@@ -729,7 +982,12 @@ void rename_slabs(void)
a = find_one_alias(s);
- s->name = a->name;
+ if (a)
+ s->name = a->name;
+ else {
+ s->name = "*";
+ actual_slabs--;
+ }
}
}
@@ -748,11 +1006,14 @@ void read_slab_dir(void)
char *t;
int count;
+ if (chdir("/sys/slab"))
+ fatal("SYSFS support for SLUB not active\n");
+
dir = opendir(".");
while ((de = readdir(dir))) {
if (de->d_name[0] == '.' ||
- slab_mismatch(de->d_name))
- continue;
+ (de->d_name[0] != ':' && slab_mismatch(de->d_name)))
+ continue;
switch (de->d_type) {
case DT_LNK:
alias->name = strdup(de->d_name);
@@ -807,6 +1068,7 @@ void read_slab_dir(void)
}
closedir(dir);
slabs = slab - slabinfo;
+ actual_slabs = slabs;
aliases = alias - aliasinfo;
if (slabs > MAX_SLABS)
fatal("Too many slabs\n");
@@ -825,34 +1087,37 @@ void output_slabs(void)
if (show_numa)
- slab_numa(slab);
- else
- if (show_track)
+ slab_numa(slab, 0);
+ else if (show_track)
show_tracking(slab);
- else
- if (validate)
+ else if (validate)
slab_validate(slab);
- else
- if (shrink)
+ else if (shrink)
slab_shrink(slab);
- else {
- if (show_slab)
- slabcache(slab);
- }
+ else if (set_debug)
+ slab_debug(slab);
+ else if (show_ops)
+ ops(slab);
+ else if (show_slab)
+ slabcache(slab);
}
}
struct option opts[] = {
{ "aliases", 0, NULL, 'a' },
- { "slabs", 0, NULL, 'l' },
- { "numa", 0, NULL, 'n' },
- { "zero", 0, NULL, 'z' },
- { "help", 0, NULL, 'h' },
- { "validate", 0, NULL, 'v' },
+ { "debug", 2, NULL, 'd' },
+ { "empty", 0, NULL, 'e' },
{ "first-alias", 0, NULL, 'f' },
+ { "help", 0, NULL, 'h' },
+ { "inverted", 0, NULL, 'i'},
+ { "numa", 0, NULL, 'n' },
+ { "ops", 0, NULL, 'o' },
+ { "report", 0, NULL, 'r' },
{ "shrink", 0, NULL, 's' },
+ { "slabs", 0, NULL, 'l' },
{ "track", 0, NULL, 't'},
- { "inverted", 0, NULL, 'i'},
+ { "validate", 0, NULL, 'v' },
+ { "zero", 0, NULL, 'z' },
{ "1ref", 0, NULL, '1'},
{ NULL, 0, NULL, 0 }
};
@@ -864,10 +1129,9 @@ int main(int argc, char *argv[])
char *pattern_source;
page_size = getpagesize();
- if (chdir("/sys/slab"))
- fatal("This kernel does not have SLUB support.\n");
- while ((c = getopt_long(argc, argv, "afhil1npstvzTS", opts, NULL)) != -1)
+ while ((c = getopt_long(argc, argv, "ad::efhil1noprstvzTS",
+ opts, NULL)) != -1)
switch(c) {
case '1':
show_single_ref = 1;
@@ -875,6 +1139,14 @@ int main(int argc, char *argv[])
case 'a':
show_alias = 1;
break;
+ case 'd':
+ set_debug = 1;
+ if (!debug_opt_scan(optarg))
+ fatal("Invalid debug option '%s'\n", optarg);
+ break;
+ case 'e':
+ show_empty = 1;
+ break;
case 'f':
show_first_alias = 1;
break;
@@ -887,6 +1159,12 @@ int main(int argc, char *argv[])
case 'n':
show_numa = 1;
break;
+ case 'o':
+ show_ops = 1;
+ break;
+ case 'r':
+ show_report = 1;
+ break;
case 's':
shrink = 1;
break;
@@ -914,8 +1192,8 @@ int main(int argc, char *argv[])
}
- if (!show_slab && !show_alias && !show_track
- && !validate && !shrink)
+ if (!show_slab && !show_alias && !show_track && !show_report
+ && !validate && !shrink && !set_debug && !show_ops)
show_slab = 1;
if (argc > optind)
diff --git a/arch/avr32/Makefile b/arch/avr32/Makefile
index 6115fc1f0cf..dc6bc01f232 100644
--- a/arch/avr32/Makefile
+++ b/arch/avr32/Makefile
@@ -16,7 +16,7 @@ AFLAGS += -mrelax -mno-pic
CFLAGS_MODULE += -mno-relax
LDFLAGS_vmlinux += --relax
-cpuflags-$(CONFIG_CPU_AP7000) += -mcpu=ap7000
+cpuflags-$(CONFIG_CPU_AT32AP7000) += -mcpu=ap7000
CFLAGS += $(cpuflags-y)
AFLAGS += $(cpuflags-y)
diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c
index 4e4181ed1c6..13f98840261 100644
--- a/arch/avr32/kernel/process.c
+++ b/arch/avr32/kernel/process.c
@@ -330,13 +330,13 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
{
struct pt_regs *childregs;
- childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)p->thread_info)) - 1;
+ childregs = ((struct pt_regs *)(THREAD_SIZE + (unsigned long)task_stack_page(p))) - 1;
*childregs = *regs;
if (user_mode(regs))
childregs->sp = usp;
else
- childregs->sp = (unsigned long)p->thread_info + THREAD_SIZE;
+ childregs->sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs->r12 = 0; /* Set return value for child */
@@ -403,7 +403,7 @@ unsigned long get_wchan(struct task_struct *p)
if (!p || p == current || p->state == TASK_RUNNING)
return 0;
- stack_page = (unsigned long)p->thread_info;
+ stack_page = (unsigned long)task_stack_page(p);
BUG_ON(!stack_page);
/*
diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c
index 8ac74dddbbd..3c36c2d1614 100644
--- a/arch/avr32/kernel/ptrace.c
+++ b/arch/avr32/kernel/ptrace.c
@@ -24,7 +24,7 @@
static struct pt_regs *get_user_regs(struct task_struct *tsk)
{
- return (struct pt_regs *)((unsigned long) tsk->thread_info +
+ return (struct pt_regs *)((unsigned long)task_stack_page(tsk) +
THREAD_SIZE - sizeof(struct pt_regs));
}
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index 7c279586fbb..07f6a6fa340 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -291,4 +291,5 @@ sys_call_table:
.long sys_shmget /* 275 */
.long sys_shmdt
.long sys_shmctl
+ .long sys_utimensat
.long sys_ni_syscall /* r8 is saturated at nr_syscalls */
diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c
index 4de9edf96ed..86d107511dd 100644
--- a/arch/avr32/kernel/traps.c
+++ b/arch/avr32/kernel/traps.c
@@ -123,7 +123,7 @@ asmlinkage void do_address_exception(unsigned long ecr, struct pt_regs *regs)
/* This way of handling undefined instructions is stolen from ARM */
static LIST_HEAD(undef_hook);
-static spinlock_t undef_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(undef_lock);
void register_undef_hook(struct undef_hook *hook)
{
diff --git a/arch/avr32/kernel/vmlinux.lds.c b/arch/avr32/kernel/vmlinux.lds.c
index 7ad20cfb48a..e7f72c995a3 100644
--- a/arch/avr32/kernel/vmlinux.lds.c
+++ b/arch/avr32/kernel/vmlinux.lds.c
@@ -35,7 +35,7 @@ SECTIONS
_einittext = .;
. = ALIGN(4);
__tagtable_begin = .;
- *(.taglist)
+ *(.taglist.init)
__tagtable_end = .;
*(.init.data)
. = ALIGN(16);
diff --git a/arch/avr32/mach-at32ap/clock.c b/arch/avr32/mach-at32ap/clock.c
index 00c435452d7..0f8c89c9f83 100644
--- a/arch/avr32/mach-at32ap/clock.c
+++ b/arch/avr32/mach-at32ap/clock.c
@@ -18,7 +18,7 @@
#include "clock.h"
-static spinlock_t clk_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_SPINLOCK(clk_lock);
struct clk *clk_get(struct device *dev, const char *id)
{
diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c
index b68d669f823..099212d4567 100644
--- a/arch/avr32/mm/dma-coherent.c
+++ b/arch/avr32/mm/dma-coherent.c
@@ -112,16 +112,21 @@ void dma_free_coherent(struct device *dev, size_t size,
}
EXPORT_SYMBOL(dma_free_coherent);
-#if 0
void *dma_alloc_writecombine(struct device *dev, size_t size,
dma_addr_t *handle, gfp_t gfp)
{
struct page *page;
+ dma_addr_t phys;
page = __dma_alloc(dev, size, handle, gfp);
+ if (!page)
+ return NULL;
+
+ phys = page_to_phys(page);
+ *handle = phys;
/* Now, map the page into P3 with write-combining turned on */
- return __ioremap(page_to_phys(page), size, _PAGE_BUFFER);
+ return __ioremap(phys, size, _PAGE_BUFFER);
}
EXPORT_SYMBOL(dma_alloc_writecombine);
@@ -132,8 +137,7 @@ void dma_free_writecombine(struct device *dev, size_t size,
iounmap(cpu_addr);
- page = bus_to_page(handle);
+ page = phys_to_page(handle);
__dma_free(dev, size, page, handle);
}
EXPORT_SYMBOL(dma_free_writecombine);
-#endif
diff --git a/arch/blackfin/kernel/asm-offsets.c b/arch/blackfin/kernel/asm-offsets.c
index 41d9a9f8970..e455f450450 100644
--- a/arch/blackfin/kernel/asm-offsets.c
+++ b/arch/blackfin/kernel/asm-offsets.c
@@ -46,7 +46,7 @@ int main(void)
DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace));
DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
- DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, thread_info));
+ DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack));
DEFINE(TASK_MM, offsetof(struct task_struct, mm));
DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
DEFINE(TASK_SIGPENDING, offsetof(struct task_struct, pending));
diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c
index d7c8e514cb9..e718bb4a1ef 100644
--- a/arch/blackfin/kernel/ptrace.c
+++ b/arch/blackfin/kernel/ptrace.c
@@ -73,7 +73,7 @@
static inline struct pt_regs *get_user_regs(struct task_struct *task)
{
return (struct pt_regs *)
- ((unsigned long)task->thread_info +
+ ((unsigned long)task_stack_page(task) +
(THREAD_SIZE - sizeof(struct pt_regs)));
}
@@ -99,7 +99,7 @@ static inline long get_reg(struct task_struct *task, int regno)
unsigned char *reg_ptr;
struct pt_regs *regs =
- (struct pt_regs *)((unsigned long)task->thread_info +
+ (struct pt_regs *)((unsigned long)task_stack_page(task) +
(THREAD_SIZE - sizeof(struct pt_regs)));
reg_ptr = (char *)regs;
@@ -125,7 +125,7 @@ put_reg(struct task_struct *task, int regno, unsigned long data)
char * reg_ptr;
struct pt_regs *regs =
- (struct pt_regs *)((unsigned long)task->thread_info +
+ (struct pt_regs *)((unsigned long)task_stack_page(task) +
(THREAD_SIZE - sizeof(struct pt_regs)));
reg_ptr = (char *)regs;
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index eed694312a7..114738a4558 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -45,15 +45,15 @@ config TIME_LOW_RES
bool
default y
-config ARCH_HAS_ILOG2_U32
+config QUICKLIST
bool
default y
-config ARCH_HAS_ILOG2_U64
+config ARCH_HAS_ILOG2_U32
bool
default y
-config ARCH_USES_SLAB_PAGE_STRUCT
+config ARCH_HAS_ILOG2_U64
bool
default y
diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c
index 515a5cea546..9583a338e9d 100644
--- a/arch/frv/kernel/process.c
+++ b/arch/frv/kernel/process.c
@@ -25,12 +25,14 @@
#include <linux/elf.h>
#include <linux/reboot.h>
#include <linux/interrupt.h>
+#include <linux/pagemap.h>
#include <asm/asm-offsets.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/setup.h>
#include <asm/pgtable.h>
+#include <asm/tlb.h>
#include <asm/gdb-stub.h>
#include <asm/mb-regs.h>
@@ -88,6 +90,8 @@ void cpu_idle(void)
while (!need_resched()) {
irq_stat[cpu].idle_timestamp = jiffies;
+ check_pgt_cache();
+
if (!frv_dma_inprogress && idle)
idle();
}
diff --git a/arch/frv/mm/pgalloc.c b/arch/frv/mm/pgalloc.c
index 598a26ab8ad..7787c3cc52c 100644
--- a/arch/frv/mm/pgalloc.c
+++ b/arch/frv/mm/pgalloc.c
@@ -13,12 +13,12 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/highmem.h>
+#include <linux/quicklist.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
#include <asm/cacheflush.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((aligned(PAGE_SIZE)));
-struct kmem_cache *pgd_cache;
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
@@ -100,7 +100,7 @@ static inline void pgd_list_del(pgd_t *pgd)
set_page_private(next, (unsigned long) pprev);
}
-void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
+void pgd_ctor(void *pgd)
{
unsigned long flags;
@@ -120,7 +120,7 @@ void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
}
/* never called when PTRS_PER_PMD > 1 */
-void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
+void pgd_dtor(void *pgd)
{
unsigned long flags; /* can be called from interrupt context */
@@ -133,7 +133,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
- pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
+ pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
if (!pgd)
return pgd;
@@ -143,15 +143,15 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
void pgd_free(pgd_t *pgd)
{
/* in the non-PAE case, clear_page_tables() clears user pgd entries */
- kmem_cache_free(pgd_cache, pgd);
+ quicklist_free(0, pgd_dtor, pgd);
}
void __init pgtable_cache_init(void)
{
- pgd_cache = kmem_cache_create("pgd",
- PTRS_PER_PGD * sizeof(pgd_t),
- PTRS_PER_PGD * sizeof(pgd_t),
- SLAB_PANIC,
- pgd_ctor,
- pgd_dtor);
}
+
+void check_pgt_cache(void)
+{
+ quicklist_trim(0, pgd_dtor, 25, 16);
+}
+
diff --git a/arch/h8300/kernel/asm-offsets.c b/arch/h8300/kernel/asm-offsets.c
index b78b82ad28a..fc30b4fd091 100644
--- a/arch/h8300/kernel/asm-offsets.c
+++ b/arch/h8300/kernel/asm-offsets.c
@@ -30,7 +30,7 @@ int main(void)
DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace));
DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
- DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, thread_info));
+ DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack));
DEFINE(TASK_MM, offsetof(struct task_struct, mm));
DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index 80b4c5d421b..e5be819492e 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -733,9 +733,11 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
cache_add_dev(sys_dev);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
cache_remove_dev(sys_dev);
break;
}
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c
index 2f28540caae..7ba7c3abd3a 100644
--- a/arch/i386/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -137,10 +137,12 @@ static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
mutex_lock(&therm_cpu_lock);
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
err = thermal_throttle_add_dev(sys_dev);
WARN_ON(err);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
thermal_throttle_remove_dev(sys_dev);
break;
}
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index 6471a5a1320..200fb3f9ebf 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -77,8 +77,10 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
/* If we can run i686 user-space code, call us an i686 */
-#define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV)
- if ( c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686 )
+#define USER686 ((1 << X86_FEATURE_TSC)|\
+ (1 << X86_FEATURE_CX8)|\
+ (1 << X86_FEATURE_CMOV))
+ if (c->x86 == 5 && (c->x86_capability[0] & USER686) == USER686)
c->x86 = 6;
#ifdef CONFIG_SYSCTL
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c
index eeae0d99233..5c2faa10e9f 100644
--- a/arch/i386/kernel/cpuid.c
+++ b/arch/i386/kernel/cpuid.c
@@ -169,9 +169,11 @@ static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long ac
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
cpuid_device_create(cpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu));
break;
}
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index cbe7ec8dbb9..83f825f2e2d 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -567,7 +567,7 @@ static int cpu_request_microcode(int cpu)
return error;
}
-static int apply_microcode_on_cpu(int cpu)
+static int apply_microcode_check_cpu(int cpu)
{
struct cpuinfo_x86 *c = cpu_data + cpu;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -575,8 +575,9 @@ static int apply_microcode_on_cpu(int cpu)
unsigned int val[2];
int err = 0;
+ /* Check if the microcode is available */
if (!uci->mc)
- return -EINVAL;
+ return 0;
old = current->cpus_allowed;
set_cpus_allowed(current, cpumask_of_cpu(cpu));
@@ -614,7 +615,7 @@ static int apply_microcode_on_cpu(int cpu)
return err;
}
-static void microcode_init_cpu(int cpu)
+static void microcode_init_cpu(int cpu, int resume)
{
cpumask_t old;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -624,8 +625,7 @@ static void microcode_init_cpu(int cpu)
set_cpus_allowed(current, cpumask_of_cpu(cpu));
mutex_lock(&microcode_mutex);
collect_cpu_info(cpu);
- if (uci->valid && system_state == SYSTEM_RUNNING &&
- !suspend_cpu_hotplug)
+ if (uci->valid && system_state == SYSTEM_RUNNING && !resume)
cpu_request_microcode(cpu);
mutex_unlock(&microcode_mutex);
set_cpus_allowed(current, old);
@@ -702,7 +702,7 @@ static struct attribute_group mc_attr_group = {
.name = "microcode",
};
-static int mc_sysdev_add(struct sys_device *sys_dev)
+static int __mc_sysdev_add(struct sys_device *sys_dev, int resume)
{
int err, cpu = sys_dev->id;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
@@ -711,39 +711,31 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
return 0;
pr_debug("Microcode:CPU %d added\n", cpu);
- /* If suspend_cpu_hotplug is set, the system is resuming and we should
- * use the data from before the suspend.
- */
- if (suspend_cpu_hotplug) {
- err = apply_microcode_on_cpu(cpu);
- if (err)
- microcode_fini_cpu(cpu);
- }
- if (!uci->valid)
- memset(uci, 0, sizeof(*uci));
+ memset(uci, 0, sizeof(*uci));
err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
if (err)
return err;
- if (!uci->valid)
- microcode_init_cpu(cpu);
+ microcode_init_cpu(cpu, resume);
return 0;
}
+static int mc_sysdev_add(struct sys_device *sys_dev)
+{
+ return __mc_sysdev_add(sys_dev, 0);
+}
+
static int mc_sysdev_remove(struct sys_device *sys_dev)
{
int cpu = sys_dev->id;
if (!cpu_online(cpu))
return 0;
+
pr_debug("Microcode:CPU %d removed\n", cpu);
- /* If suspend_cpu_hotplug is set, the system is suspending and we should
- * keep the microcode in memory for the resume.
- */
- if (!suspend_cpu_hotplug)
- microcode_fini_cpu(cpu);
+ microcode_fini_cpu(cpu);
sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
return 0;
}
@@ -774,13 +766,34 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
+ case CPU_UP_CANCELED_FROZEN:
+ /* The CPU refused to come up during a system resume */
+ microcode_fini_cpu(cpu);
+ break;
case CPU_ONLINE:
case CPU_DOWN_FAILED:
mc_sysdev_add(sys_dev);
break;
+ case CPU_ONLINE_FROZEN:
+ /* System-wide resume is in progress, try to apply microcode */
+ if (apply_microcode_check_cpu(cpu)) {
+ /* The application of microcode failed */
+ microcode_fini_cpu(cpu);
+ __mc_sysdev_add(sys_dev, 1);
+ break;
+ }
+ case CPU_DOWN_FAILED_FROZEN:
+ if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group))
+ printk(KERN_ERR "Microcode: Failed to create the sysfs "
+ "group for CPU%d\n", cpu);
+ break;
case CPU_DOWN_PREPARE:
mc_sysdev_remove(sys_dev);
break;
+ case CPU_DOWN_PREPARE_FROZEN:
+ /* Suspend is in progress, only remove the interface */
+ sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
+ break;
}
return NOTIFY_OK;
}
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c
index 8cd0a91ce10..0c1069b8d63 100644
--- a/arch/i386/kernel/msr.c
+++ b/arch/i386/kernel/msr.c
@@ -153,9 +153,11 @@ static int msr_class_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
msr_device_create(cpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
device_destroy(msr_class, MKDEV(MSR_MAJOR, cpu));
break;
}
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 4bec0cbf407..c05e7e861b2 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -305,7 +305,7 @@ void show_registers(struct pt_regs *regs)
regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
TASK_COMM_LEN, current->comm, current->pid,
- current_thread_info(), current, current->thread_info);
+ current_thread_info(), current, task_thread_info(current));
/*
* When in-kernel, we also print out the stack and code at the
* time of the fault..
diff --git a/arch/i386/mach-generic/probe.c b/arch/i386/mach-generic/probe.c
index a7b3999bb37..74f3da63442 100644
--- a/arch/i386/mach-generic/probe.c
+++ b/arch/i386/mach-generic/probe.c
@@ -119,9 +119,7 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
return 0;
}
-#ifdef CONFIG_SMP
int hard_smp_processor_id(void)
{
return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
}
-#endif
diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c
index 8fe7e4593d5..9b77b39b71a 100644
--- a/arch/i386/mach-voyager/voyager_basic.c
+++ b/arch/i386/mach-voyager/voyager_basic.c
@@ -292,8 +292,8 @@ machine_emergency_restart(void)
void
mca_nmi_hook(void)
{
- __u8 dumpval __attribute__((unused)) = inb(0xf823);
- __u8 swnmi __attribute__((unused)) = inb(0xf813);
+ __u8 dumpval __maybe_unused = inb(0xf823);
+ __u8 swnmi __maybe_unused = inb(0xf813);
/* FIXME: assume dump switch pressed */
/* check to see if the dump switch was pressed */
diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c
index 1cf11af96de..3de9f9ba2da 100644
--- a/arch/i386/pci/init.c
+++ b/arch/i386/pci/init.c
@@ -6,7 +6,7 @@
in the right sequence from here. */
static __init int pci_access_init(void)
{
- int type __attribute__((unused)) = 0;
+ int type __maybe_unused = 0;
#ifdef CONFIG_PCI_DIRECT
type = pci_direct_probe();
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
index d3e9f33e8bd..6a49600cf33 100644
--- a/arch/ia64/kernel/err_inject.c
+++ b/arch/ia64/kernel/err_inject.c
@@ -236,9 +236,11 @@ static int __cpuinit err_inject_cpu_callback(struct notifier_block *nfb,
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
err_inject_add_dev(sys_dev);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
err_inject_remove_dev(sys_dev);
break;
}
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 1d7cc7e2ce3..f8ae709de0b 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1689,7 +1689,7 @@ format_mca_init_stack(void *mca_data, unsigned long offset,
ti->preempt_count = 1;
ti->task = p;
ti->cpu = cpu;
- p->thread_info = ti;
+ p->stack = ti;
p->state = TASK_UNINTERRUPTIBLE;
cpu_set(cpu, p->cpus_allowed);
INIT_LIST_HEAD(&p->tasks);
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
index a71df9ae039..85829e27785 100644
--- a/arch/ia64/kernel/palinfo.c
+++ b/arch/ia64/kernel/palinfo.c
@@ -975,9 +975,11 @@ static int palinfo_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
create_palinfo_proc_entries(hotcpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
remove_palinfo_proc_entries(hotcpu);
break;
}
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
index a51f1d0bfb7..89f6b138a62 100644
--- a/arch/ia64/kernel/salinfo.c
+++ b/arch/ia64/kernel/salinfo.c
@@ -582,6 +582,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu
struct salinfo_data *data;
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
spin_lock_irqsave(&data_saved_lock, flags);
for (i = 0, data = salinfo_data;
i < ARRAY_SIZE(salinfo_data);
@@ -592,6 +593,7 @@ salinfo_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu
spin_unlock_irqrestore(&data_saved_lock, flags);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
spin_lock_irqsave(&data_saved_lock, flags);
for (i = 0, data = salinfo_data;
i < ARRAY_SIZE(salinfo_data);
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 687500ddb4b..94ae3c87d82 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -412,9 +412,11 @@ static int __cpuinit cache_cpu_callback(struct notifier_block *nfb,
sys_dev = get_cpu_sysdev(cpu);
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
cache_add_dev(sys_dev);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
cache_remove_dev(sys_dev);
break;
}
diff --git a/arch/m68knommu/kernel/asm-offsets.c b/arch/m68knommu/kernel/asm-offsets.c
index b988c7bdc6e..7cd183d346e 100644
--- a/arch/m68knommu/kernel/asm-offsets.c
+++ b/arch/m68knommu/kernel/asm-offsets.c
@@ -31,7 +31,7 @@ int main(void)
DEFINE(TASK_PTRACE, offsetof(struct task_struct, ptrace));
DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked));
DEFINE(TASK_THREAD, offsetof(struct task_struct, thread));
- DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, thread_info));
+ DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack));
DEFINE(TASK_MM, offsetof(struct task_struct, mm));
DEFINE(TASK_ACTIVE_MM, offsetof(struct task_struct, active_mm));
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 761a779d5c4..3b27309d54b 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -82,7 +82,7 @@ void output_task_defines(void)
{
text("/* MIPS task_struct offsets. */");
offset("#define TASK_STATE ", struct task_struct, state);
- offset("#define TASK_THREAD_INFO ", struct task_struct, thread_info);
+ offset("#define TASK_THREAD_INFO ", struct task_struct, stack);
offset("#define TASK_FLAGS ", struct task_struct, flags);
offset("#define TASK_MM ", struct task_struct, mm);
offset("#define TASK_PID ", struct task_struct, pid);
diff --git a/arch/mips/kernel/smtc.c b/arch/mips/kernel/smtc.c
index 5dcfab6b288..b361edb83dc 100644
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -560,7 +560,7 @@ void smtc_boot_secondary(int cpu, struct task_struct *idle)
write_tc_gpr_sp(__KSTK_TOS(idle));
/* global pointer */
- write_tc_gpr_gp((unsigned long)idle->thread_info);
+ write_tc_gpr_gp((unsigned long)task_thread_info(idle));
smtc_status |= SMTC_MTC_ACTIVE;
write_tc_c0_tchalt(0);
diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c
index 54fdb959149..d3b7917a87c 100644
--- a/arch/parisc/kernel/asm-offsets.c
+++ b/arch/parisc/kernel/asm-offsets.c
@@ -54,7 +54,7 @@
int main(void)
{
- DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, thread_info));
+ DEFINE(TASK_THREAD_INFO, offsetof(struct task_struct, stack));
DEFINE(TASK_STATE, offsetof(struct task_struct, state));
DEFINE(TASK_FLAGS, offsetof(struct task_struct, flags));
DEFINE(TASK_SIGPENDING, offsetof(struct task_struct, pending));
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 8f48560b7ee..37bc35e69db 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -58,7 +58,7 @@ int main(void)
#ifdef CONFIG_PPC64
DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context));
#else
- DEFINE(THREAD_INFO, offsetof(struct task_struct, thread_info));
+ DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
DEFINE(PTRACE, offsetof(struct task_struct, ptrace));
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index cae39d9dfe4..68991c2d4a1 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -342,10 +342,12 @@ static int __cpuinit sysfs_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
register_cpu_online(cpu);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
unregister_cpu_online(cpu);
break;
#endif
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b3a592b25ab..de45aa82d97 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -252,12 +252,15 @@ static int __cpuinit cpu_numa_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
numa_setup_cpu(lcpu);
ret = NOTIFY_OK;
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
unmap_cpu_from_node(lcpu);
break;
ret = NOTIFY_OK;
diff --git a/arch/ppc/kernel/asm-offsets.c b/arch/ppc/kernel/asm-offsets.c
index c5850a27265..e8e94321b59 100644
--- a/arch/ppc/kernel/asm-offsets.c
+++ b/arch/ppc/kernel/asm-offsets.c
@@ -35,7 +35,7 @@ int
main(void)
{
DEFINE(THREAD, offsetof(struct task_struct, thread));
- DEFINE(THREAD_INFO, offsetof(struct task_struct, thread_info));
+ DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
DEFINE(MM, offsetof(struct task_struct, mm));
DEFINE(PTRACE, offsetof(struct task_struct, ptrace));
DEFINE(KSP, offsetof(struct thread_struct, ksp));
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index ee89b33145d..81a2b92ab0c 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -567,9 +567,11 @@ appldata_cpu_notify(struct notifier_block *self,
{
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
appldata_online_cpu((long) hcpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
appldata_offline_cpu((long) hcpu);
break;
default:
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index ec514fe5ccd..1375f8a4469 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -15,7 +15,7 @@
int main(void)
{
- DEFINE(__THREAD_info, offsetof(struct task_struct, thread_info),);
+ DEFINE(__THREAD_info, offsetof(struct task_struct, stack),);
DEFINE(__THREAD_ksp, offsetof(struct task_struct, thread.ksp),);
DEFINE(__THREAD_per, offsetof(struct task_struct, thread.per_info),);
DEFINE(__THREAD_mm_segment,
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index b7977027a28..09f028a3266 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -789,10 +789,12 @@ static int __cpuinit smp_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
if (sysdev_create_file(s, &attr_capability))
return NOTIFY_BAD;
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
sysdev_remove_file(s, &attr_capability);
break;
}
diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c
index 29d7cfd1c97..6773ed76e41 100644
--- a/arch/sparc/kernel/asm-offsets.c
+++ b/arch/sparc/kernel/asm-offsets.c
@@ -28,7 +28,7 @@ int foo(void)
DEFINE(AOFF_task_gid, offsetof(struct task_struct, gid));
DEFINE(AOFF_task_euid, offsetof(struct task_struct, euid));
DEFINE(AOFF_task_egid, offsetof(struct task_struct, egid));
- /* DEFINE(THREAD_INFO, offsetof(struct task_struct, thread_info)); */
+ /* DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); */
DEFINE(ASIZ_task_uid, sizeof(current->uid));
DEFINE(ASIZ_task_gid, sizeof(current->gid));
DEFINE(ASIZ_task_euid, sizeof(current->euid));
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index dc652f21029..d0fde36395b 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/kdebug.h>
+#include <asm/smp.h>
#include <asm/delay.h>
#include <asm/system.h>
#include <asm/ptrace.h>
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index 354cc6b7053..b9c0f307a8f 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -320,21 +320,7 @@ source "crypto/Kconfig"
source "lib/Kconfig"
-menu "SCSI support"
-depends on BROKEN
-
-config SCSI
- tristate "SCSI support"
-
-# This gives us free_dma, which scsi.c wants.
-config GENERIC_ISA_DMA
- bool
- depends on SCSI
- default y
-
-source "arch/um/Kconfig.scsi"
-
-endmenu
+source "drivers/scsi/Kconfig"
source "drivers/md/Kconfig"
diff --git a/arch/um/Kconfig.scsi b/arch/um/Kconfig.scsi
deleted file mode 100644
index c291c942b1a..00000000000
--- a/arch/um/Kconfig.scsi
+++ /dev/null
@@ -1,58 +0,0 @@
-comment "SCSI support type (disk, tape, CD-ROM)"
- depends on SCSI
-
-config BLK_DEV_SD
- tristate "SCSI disk support"
- depends on SCSI
-
-config SD_EXTRA_DEVS
- int "Maximum number of SCSI disks that can be loaded as modules"
- depends on BLK_DEV_SD
- default "40"
-
-config CHR_DEV_ST
- tristate "SCSI tape support"
- depends on SCSI
-
-config BLK_DEV_SR
- tristate "SCSI CD-ROM support"
- depends on SCSI
-
-config BLK_DEV_SR_VENDOR
- bool "Enable vendor-specific extensions (for SCSI CDROM)"
- depends on BLK_DEV_SR
-
-config SR_EXTRA_DEVS
- int "Maximum number of CDROM devices that can be loaded as modules"
- depends on BLK_DEV_SR
- default "2"
-
-config CHR_DEV_SG
- tristate "SCSI generic support"
- depends on SCSI
-
-comment "Some SCSI devices (e.g. CD jukebox) support multiple LUNs"
- depends on SCSI
-
-#if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
-config SCSI_DEBUG_QUEUES
- bool "Enable extra checks in new queueing code"
- depends on SCSI
-
-#fi
-config SCSI_MULTI_LUN
- bool "Probe all LUNs on each SCSI device"
- depends on SCSI
-
-config SCSI_CONSTANTS
- bool "Verbose SCSI error reporting (kernel size +=12K)"
- depends on SCSI
-
-config SCSI_LOGGING
- bool "SCSI logging facility"
- depends on SCSI
-
-config SCSI_DEBUG
- tristate "SCSI debugging host simulator (EXPERIMENTAL)"
- depends on SCSI
-
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index ef36facd8fe..a96ae1a0610 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -178,20 +178,23 @@ int start_uml_skas(void)
int external_pid_skas(struct task_struct *task)
{
-#warning Need to look up userspace_pid by cpu
+ /* FIXME: Need to look up userspace_pid by cpu */
return(userspace_pid[0]);
}
int thread_pid_skas(struct task_struct *task)
{
-#warning Need to look up userspace_pid by cpu
+ /* FIXME: Need to look up userspace_pid by cpu */
return(userspace_pid[0]);
}
void kill_off_processes_skas(void)
{
if(proc_mm)
-#warning need to loop over userspace_pids in kill_off_processes_skas
+ /*
+ * FIXME: need to loop over userspace_pids in
+ * kill_off_processes_skas
+ */
os_kill_ptraced_process(userspace_pid[0], 1);
else {
struct task_struct *p;
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 92a7b59120d..2d9d2ca3929 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -239,6 +239,7 @@ out:
return ok;
}
+#ifdef UML_CONFIG_MODE_TT
void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
{
int flags = 0, pages;
@@ -260,6 +261,7 @@ void init_new_thread_stack(void *sig_stack, void (*usr1_handler)(int))
"errno = %d\n", errno);
}
}
+#endif
void init_new_thread_signals(void)
{
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 8e490fff3d4..5c894632079 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -68,7 +68,7 @@ static inline long do_syscall_stub(struct mm_id * mm_idp, void **addr)
int err, pid = mm_idp->u.pid;
if(proc_mm)
-#warning Need to look up userspace_pid by cpu
+ /* FIXME: Need to look up userspace_pid by cpu */
pid = userspace_pid[0];
multi_count++;
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index 5c088a55396..6a0e466d01e 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -586,7 +586,7 @@ void switch_mm_skas(struct mm_id *mm_idp)
{
int err;
-#warning need cpu pid in switch_mm_skas
+ /* FIXME: need cpu pid in switch_mm_skas */
if(proc_mm){
err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0,
mm_idp->u.mm_fd);
diff --git a/arch/v850/kernel/asm-offsets.c b/arch/v850/kernel/asm-offsets.c
index 24f29136907..cee5c3142d4 100644
--- a/arch/v850/kernel/asm-offsets.c
+++ b/arch/v850/kernel/asm-offsets.c
@@ -29,7 +29,7 @@ int main (void)
DEFINE (TASK_PTRACE, offsetof (struct task_struct, ptrace));
DEFINE (TASK_BLOCKED, offsetof (struct task_struct, blocked));
DEFINE (TASK_THREAD, offsetof (struct task_struct, thread));
- DEFINE (TASK_THREAD_INFO, offsetof (struct task_struct, thread_info));
+ DEFINE (TASK_THREAD_INFO, offsetof (struct task_struct, stack));
DEFINE (TASK_MM, offsetof (struct task_struct, mm));
DEFINE (TASK_ACTIVE_MM, offsetof (struct task_struct, active_mm));
DEFINE (TASK_PID, offsetof (struct task_struct, pid));
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index 3bc30d2c13d..3eaceac3248 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -32,7 +32,7 @@ atomic_t irq_err_count;
*/
static inline void stack_overflow_check(struct pt_regs *regs)
{
- u64 curbase = (u64) current->thread_info;
+ u64 curbase = (u64)task_stack_page(current);
static unsigned long warned = -60*HZ;
if (regs->rsp >= curbase && regs->rsp <= curbase + THREAD_SIZE &&
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 442169640e4..a14375dd542 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -720,9 +720,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
mce_create_device(cpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
mce_remove_device(cpu);
break;
}
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index d0bd5d66e10..03356e64f9c 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -654,9 +654,11 @@ static int threshold_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
threshold_create_device(cpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
threshold_remove_device(cpu);
break;
default:
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index dc32cef9619..51d4c6fa88c 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -327,7 +327,7 @@ static int __cpuinit
cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
{
long cpu = (long)arg;
- if (action == CPU_ONLINE)
+ if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
return NOTIFY_DONE;
}
diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c
index b256cfbef34..698079b3a33 100644
--- a/arch/xtensa/kernel/asm-offsets.c
+++ b/arch/xtensa/kernel/asm-offsets.c
@@ -70,7 +70,7 @@ int main(void)
DEFINE(TASK_ACTIVE_MM, offsetof (struct task_struct, active_mm));
DEFINE(TASK_PID, offsetof (struct task_struct, pid));
DEFINE(TASK_THREAD, offsetof (struct task_struct, thread));
- DEFINE(TASK_THREAD_INFO, offsetof (struct task_struct, thread_info));
+ DEFINE(TASK_THREAD_INFO, offsetof (struct task_struct, stack));
DEFINE(TASK_STRUCT_SIZE, sizeof (struct task_struct));
BLANK();
diff --git a/block/as-iosched.c b/block/as-iosched.c
index 640aa839d63..109e91b91ff 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -1306,7 +1306,7 @@ static void as_exit_queue(elevator_t *e)
struct as_data *ad = e->elevator_data;
del_timer_sync(&ad->antic_timer);
- kblockd_flush();
+ kblockd_flush_work(&ad->antic_work);
BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC]));
BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC]));
diff --git a/block/genhd.c b/block/genhd.c
index b5664440896..93a2cf65459 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -213,6 +213,59 @@ struct gendisk *get_gendisk(dev_t dev, int *part)
return kobj ? to_disk(kobj) : NULL;
}
+/*
+ * print a full list of all partitions - intended for places where the root
+ * filesystem can't be mounted and thus to give the victim some idea of what
+ * went wrong
+ */
+void __init printk_all_partitions(void)
+{
+ int n;
+ struct gendisk *sgp;
+
+ mutex_lock(&block_subsys_lock);
+ /* For each block device... */
+ list_for_each_entry(sgp, &block_subsys.list, kobj.entry) {
+ char buf[BDEVNAME_SIZE];
+ /*
+ * Don't show empty devices or things that have been surpressed
+ */
+ if (get_capacity(sgp) == 0 ||
+ (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
+ continue;
+
+ /*
+ * Note, unlike /proc/partitions, I am showing the numbers in
+ * hex - the same format as the root= option takes.
+ */
+ printk("%02x%02x %10llu %s",
+ sgp->major, sgp->first_minor,
+ (unsigned long long)get_capacity(sgp) >> 1,
+ disk_name(sgp, 0, buf));
+ if (sgp->driverfs_dev != NULL &&
+ sgp->driverfs_dev->driver != NULL)
+ printk(" driver: %s\n",
+ sgp->driverfs_dev->driver->name);
+ else
+ printk(" (driver?)\n");
+
+ /* now show the partitions */
+ for (n = 0; n < sgp->minors - 1; ++n) {
+ if (sgp->part[n] == NULL)
+ continue;
+ if (sgp->part[n]->nr_sects == 0)
+ continue;
+ printk(" %02x%02x %10llu %s\n",
+ sgp->major, n + 1 + sgp->first_minor,
+ (unsigned long long)sgp->part[n]->nr_sects >> 1,
+ disk_name(sgp, n + 1, buf));
+ } /* partition subloop */
+ } /* Block device loop */
+
+ mutex_unlock(&block_subsys_lock);
+ return;
+}
+
#ifdef CONFIG_PROC_FS
/* iterator */
static void *part_start(struct seq_file *part, loff_t *pos)
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index f294f1538f1..17e18897342 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -1712,7 +1712,6 @@ EXPORT_SYMBOL(blk_stop_queue);
void blk_sync_queue(struct request_queue *q)
{
del_timer_sync(&q->unplug_timer);
- kblockd_flush();
}
EXPORT_SYMBOL(blk_sync_queue);
@@ -3508,7 +3507,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action,
* If a CPU goes away, splice its entries to the current CPU
* and trigger a run of the softirq
*/
- if (action == CPU_DEAD) {
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
int cpu = (unsigned long) hcpu;
local_irq_disable();
@@ -3632,11 +3631,11 @@ int kblockd_schedule_work(struct work_struct *work)
EXPORT_SYMBOL(kblockd_schedule_work);
-void kblockd_flush(void)
+void kblockd_flush_work(struct work_struct *work)
{
- flush_workqueue(kblockd_workqueue);
+ cancel_work_sync(work);
}
-EXPORT_SYMBOL(kblockd_flush);
+EXPORT_SYMBOL(kblockd_flush_work);
int __init blk_dev_init(void)
{
diff --git a/drivers/acpi/sleep/main.c b/drivers/acpi/sleep/main.c
index f8c63410bcb..52b23471dd6 100644
--- a/drivers/acpi/sleep/main.c
+++ b/drivers/acpi/sleep/main.c
@@ -29,7 +29,6 @@ static u32 acpi_suspend_states[] = {
[PM_SUSPEND_ON] = ACPI_STATE_S0,
[PM_SUSPEND_STANDBY] = ACPI_STATE_S1,
[PM_SUSPEND_MEM] = ACPI_STATE_S3,
- [PM_SUSPEND_DISK] = ACPI_STATE_S4,
[PM_SUSPEND_MAX] = ACPI_STATE_S5
};
@@ -94,14 +93,6 @@ static int acpi_pm_enter(suspend_state_t pm_state)
do_suspend_lowlevel();
break;
- case PM_SUSPEND_DISK:
- if (acpi_pm_ops.pm_disk_mode == PM_DISK_PLATFORM)
- status = acpi_enter_sleep_state(acpi_state);
- break;
- case PM_SUSPEND_MAX:
- acpi_power_off();
- break;
-
default:
return -EINVAL;
}
@@ -157,12 +148,13 @@ int acpi_suspend(u32 acpi_state)
suspend_state_t states[] = {
[1] = PM_SUSPEND_STANDBY,
[3] = PM_SUSPEND_MEM,
- [4] = PM_SUSPEND_DISK,
[5] = PM_SUSPEND_MAX
};
if (acpi_state < 6 && states[acpi_state])
return pm_suspend(states[acpi_state]);
+ if (acpi_state == 4)
+ return hibernate();
return -EINVAL;
}
@@ -189,6 +181,49 @@ static struct pm_ops acpi_pm_ops = {
.finish = acpi_pm_finish,
};
+#ifdef CONFIG_SOFTWARE_SUSPEND
+static int acpi_hibernation_prepare(void)
+{
+ return acpi_sleep_prepare(ACPI_STATE_S4);
+}
+
+static int acpi_hibernation_enter(void)
+{
+ acpi_status status = AE_OK;
+ unsigned long flags = 0;
+
+ ACPI_FLUSH_CPU_CACHE();
+
+ local_irq_save(flags);
+ acpi_enable_wakeup_device(ACPI_STATE_S4);
+ /* This shouldn't return. If it returns, we have a problem */
+ status = acpi_enter_sleep_state(ACPI_STATE_S4);
+ local_irq_restore(flags);
+
+ return ACPI_SUCCESS(status) ? 0 : -EFAULT;
+}
+
+static void acpi_hibernation_finish(void)
+{
+ acpi_leave_sleep_state(ACPI_STATE_S4);
+ acpi_disable_wakeup_device(ACPI_STATE_S4);
+
+ /* reset firmware waking vector */
+ acpi_set_firmware_waking_vector((acpi_physical_address) 0);
+
+ if (init_8259A_after_S1) {
+ printk("Broken toshiba laptop -> kicking interrupts\n");
+ init_8259A(0);
+ }
+}
+
+static struct hibernation_ops acpi_hibernation_ops = {
+ .prepare = acpi_hibernation_prepare,
+ .enter = acpi_hibernation_enter,
+ .finish = acpi_hibernation_finish,
+};
+#endif /* CONFIG_SOFTWARE_SUSPEND */
+
/*
* Toshiba fails to preserve interrupts over S1, reinitialization
* of 8259 is needed after S1 resume.
@@ -227,14 +262,18 @@ int __init acpi_sleep_init(void)
sleep_states[i] = 1;
printk(" S%d", i);
}
- if (i == ACPI_STATE_S4) {
- if (sleep_states[i])
- acpi_pm_ops.pm_disk_mode = PM_DISK_PLATFORM;
- }
}
printk(")\n");
pm_set_ops(&acpi_pm_ops);
+
+#ifdef CONFIG_SOFTWARE_SUSPEND
+ if (sleep_states[ACPI_STATE_S4])
+ hibernation_set_ops(&acpi_hibernation_ops);
+#else
+ sleep_states[ACPI_STATE_S4] = 0;
+#endif
+
return 0;
}
diff --git a/drivers/acpi/sleep/proc.c b/drivers/acpi/sleep/proc.c
index 5a76e5be61d..76b45f0b834 100644
--- a/drivers/acpi/sleep/proc.c
+++ b/drivers/acpi/sleep/proc.c
@@ -60,7 +60,7 @@ acpi_system_write_sleep(struct file *file,
state = simple_strtoul(str, NULL, 0);
#ifdef CONFIG_SOFTWARE_SUSPEND
if (state == 4) {
- error = pm_suspend(PM_SUSPEND_DISK);
+ error = hibernate();
goto Done;
}
#endif
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index a7950885d18..fef87dd70d1 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1316,7 +1316,7 @@ void ata_port_flush_task(struct ata_port *ap)
spin_unlock_irqrestore(ap->lock, flags);
DPRINTK("flush #1\n");
- flush_workqueue(ata_wq);
+ cancel_work_sync(&ap->port_task.work); /* akpm: seems unneeded */
/*
* At this point, if a task is running, it's guaranteed to see
@@ -1327,7 +1327,7 @@ void ata_port_flush_task(struct ata_port *ap)
if (ata_msg_ctl(ap))
ata_port_printk(ap, KERN_DEBUG, "%s: flush #2\n",
__FUNCTION__);
- flush_workqueue(ata_wq);
+ cancel_work_sync(&ap->port_task.work);
}
spin_lock_irqsave(ap->lock, flags);
@@ -6475,9 +6475,9 @@ void ata_port_detach(struct ata_port *ap)
/* Flush hotplug task. The sequence is similar to
* ata_port_flush_task().
*/
- flush_workqueue(ata_aux_wq);
+ cancel_work_sync(&ap->hotplug_task.work); /* akpm: why? */
cancel_delayed_work(&ap->hotplug_task);
- flush_workqueue(ata_aux_wq);
+ cancel_work_sync(&ap->hotplug_task.work);
skip_eh:
/* remove the associated SCSI host */
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index 067a9e8bc37..8d8cdfec652 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -126,10 +126,13 @@ static int __cpuinit topology_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
rc = topology_add_dev(cpu);
break;
case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
topology_remove_dev(cpu);
break;
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index af6d7274a7c..18cdd8c7762 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -243,17 +243,13 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
transfer_result = lo_do_transfer(lo, WRITE, page, offset,
bvec->bv_page, bv_offs, size, IV);
if (unlikely(transfer_result)) {
- char *kaddr;
-
/*
* The transfer failed, but we still write the data to
* keep prepare/commit calls balanced.
*/
printk(KERN_ERR "loop: transfer error block %llu\n",
(unsigned long long)index);
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, 0, size);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, offset, size, KM_USER0);
}
flush_dcache_page(page);
ret = aops->commit_write(file, page, offset,
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 090796bef78..069ae39a9cd 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -366,20 +366,25 @@ static struct disk_attribute pid_attr = {
.show = pid_show,
};
-static void nbd_do_it(struct nbd_device *lo)
+static int nbd_do_it(struct nbd_device *lo)
{
struct request *req;
+ int ret;
BUG_ON(lo->magic != LO_MAGIC);
lo->pid = current->pid;
- sysfs_create_file(&lo->disk->kobj, &pid_attr.attr);
+ ret = sysfs_create_file(&lo->disk->kobj, &pid_attr.attr);
+ if (ret) {
+ printk(KERN_ERR "nbd: sysfs_create_file failed!");
+ return ret;
+ }
while ((req = nbd_read_stat(lo)) != NULL)
nbd_end_request(req);
sysfs_remove_file(&lo->disk->kobj, &pid_attr.attr);
- return;
+ return 0;
}
static void nbd_clear_que(struct nbd_device *lo)
@@ -569,7 +574,9 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
case NBD_DO_IT:
if (!lo->file)
return -EINVAL;
- nbd_do_it(lo);
+ error = nbd_do_it(lo);
+ if (error)
+ return error;
/* on return tidy up in case we have a signal */
/* Forcibly shutdown the socket causing all listeners
* to error
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 5f3acd8e64b..7cda04b3353 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -91,3 +91,17 @@ config HW_RANDOM_OMAP
module will be called omap-rng.
If unsure, say Y.
+
+config HW_RANDOM_PASEMI
+ tristate "PA Semi HW Random Number Generator support"
+ depends on HW_RANDOM && PPC_PASEMI
+ default HW_RANDOM
+ ---help---
+ This driver provides kernel-side support for the Random Number
+ Generator hardware found on PA6T-1682M processor.
+
+ To compile this driver as a module, choose M here: the
+ module will be called pasemi-rng.
+
+ If unsure, say Y.
+
diff --git a/drivers/char/hw_random/Makefile b/drivers/char/hw_random/Makefile
index c41fa19454e..c8b7300e2fb 100644
--- a/drivers/char/hw_random/Makefile
+++ b/drivers/char/hw_random/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_HW_RANDOM_GEODE) += geode-rng.o
obj-$(CONFIG_HW_RANDOM_VIA) += via-rng.o
obj-$(CONFIG_HW_RANDOM_IXP4XX) += ixp4xx-rng.o
obj-$(CONFIG_HW_RANDOM_OMAP) += omap-rng.o
+obj-$(CONFIG_HW_RANDOM_PASEMI) += pasemi-rng.o
diff --git a/drivers/char/hw_random/pasemi-rng.c b/drivers/char/hw_random/pasemi-rng.c
new file mode 100644
index 00000000000..fa6040b6c8f
--- /dev/null
+++ b/drivers/char/hw_random/pasemi-rng.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2006-2007 PA Semi, Inc
+ *
+ * Maintained by: Olof Johansson <olof@lixom.net>
+ *
+ * Driver for the PWRficient onchip rng
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/hw_random.h>
+#include <asm/of_platform.h>
+#include <asm/io.h>
+
+#define SDCRNG_CTL_REG 0x00
+#define SDCRNG_CTL_FVLD_M 0x0000f000
+#define SDCRNG_CTL_FVLD_S 12
+#define SDCRNG_CTL_KSZ 0x00000800
+#define SDCRNG_CTL_RSRC_CRG 0x00000010
+#define SDCRNG_CTL_RSRC_RRG 0x00000000
+#define SDCRNG_CTL_CE 0x00000004
+#define SDCRNG_CTL_RE 0x00000002
+#define SDCRNG_CTL_DR 0x00000001
+#define SDCRNG_CTL_SELECT_RRG_RNG (SDCRNG_CTL_RE | SDCRNG_CTL_RSRC_RRG)
+#define SDCRNG_CTL_SELECT_CRG_RNG (SDCRNG_CTL_CE | SDCRNG_CTL_RSRC_CRG)
+#define SDCRNG_VAL_REG 0x20
+
+#define MODULE_NAME "pasemi_rng"
+
+static int pasemi_rng_data_present(struct hwrng *rng)
+{
+ void __iomem *rng_regs = (void __iomem *)rng->priv;
+
+ return (in_le32(rng_regs + SDCRNG_CTL_REG)
+ & SDCRNG_CTL_FVLD_M) ? 1 : 0;
+}
+
+static int pasemi_rng_data_read(struct hwrng *rng, u32 *data)
+{
+ void __iomem *rng_regs = (void __iomem *)rng->priv;
+ *data = in_le32(rng_regs + SDCRNG_VAL_REG);
+ return 4;
+}
+
+static int pasemi_rng_init(struct hwrng *rng)
+{
+ void __iomem *rng_regs = (void __iomem *)rng->priv;
+ u32 ctl;
+
+ ctl = SDCRNG_CTL_DR | SDCRNG_CTL_SELECT_RRG_RNG | SDCRNG_CTL_KSZ;
+ out_le32(rng_regs + SDCRNG_CTL_REG, ctl);
+ out_le32(rng_regs + SDCRNG_CTL_REG, ctl & ~SDCRNG_CTL_DR);
+
+ return 0;
+}
+
+static void pasemi_rng_cleanup(struct hwrng *rng)
+{
+ void __iomem *rng_regs = (void __iomem *)rng->priv;
+ u32 ctl;
+
+ ctl = SDCRNG_CTL_RE | SDCRNG_CTL_CE;
+ out_le32(rng_regs + SDCRNG_CTL_REG,
+ in_le32(rng_regs + SDCRNG_CTL_REG) & ~ctl);
+}
+
+static struct hwrng pasemi_rng = {
+ .name = MODULE_NAME,
+ .init = pasemi_rng_init,
+ .cleanup = pasemi_rng_cleanup,
+ .data_present = pasemi_rng_data_present,
+ .data_read = pasemi_rng_data_read,
+};
+
+static int __devinit rng_probe(struct of_device *ofdev,
+ const struct of_device_id *match)
+{
+ void __iomem *rng_regs;
+ struct device_node *rng_np = ofdev->node;
+ struct resource res;
+ int err = 0;
+
+ err = of_address_to_resource(rng_np, 0, &res);
+ if (err)
+ return -ENODEV;
+
+ rng_regs = ioremap(res.start, 0x100);
+
+ if (!rng_regs)
+ return -ENOMEM;
+
+ pasemi_rng.priv = (unsigned long)rng_regs;
+
+ printk(KERN_INFO "Registering PA Semi RNG\n");
+
+ err = hwrng_register(&pasemi_rng);
+
+ if (err)
+ iounmap(rng_regs);
+
+ return err;
+}
+
+static int __devexit rng_remove(struct of_device *dev)
+{
+ void __iomem *rng_regs = (void __iomem *)pasemi_rng.priv;
+
+ hwrng_unregister(&pasemi_rng);
+ iounmap(rng_regs);
+
+ return 0;
+}
+
+static struct of_device_id rng_match[] = {
+ {
+ .compatible = "1682m-rng",
+ },
+ {},
+};
+
+static struct of_platform_driver rng_driver = {
+ .name = "pasemi-rng",
+ .match_table = rng_match,
+ .probe = rng_probe,
+ .remove = rng_remove,
+};
+
+static int __init rng_init(void)
+{
+ return of_register_platform_driver(&rng_driver);
+}
+module_init(rng_init);
+
+static void __exit rng_exit(void)
+{
+ of_unregister_platform_driver(&rng_driver);
+}
+module_exit(rng_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Egor Martovetsky <egor@pasemi.com>");
+MODULE_DESCRIPTION("H/W RNG driver for PA Semi processor");
diff --git a/drivers/char/pcmcia/Kconfig b/drivers/char/pcmcia/Kconfig
index 27c1179ee52..f25facd97bb 100644
--- a/drivers/char/pcmcia/Kconfig
+++ b/drivers/char/pcmcia/Kconfig
@@ -21,6 +21,7 @@ config SYNCLINK_CS
config CARDMAN_4000
tristate "Omnikey Cardman 4000 support"
depends on PCMCIA
+ select BITREVERSE
help
Enable support for the Omnikey Cardman 4000 PCMCIA Smartcard
reader.
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index 4ea587983ae..fee58e03dbe 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -31,6 +31,7 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/delay.h>
+#include <linux/bitrev.h>
#include <asm/uaccess.h>
#include <asm/io.h>
@@ -194,41 +195,17 @@ static inline unsigned char xinb(unsigned short port)
}
#endif
-#define b_0000 15
-#define b_0001 14
-#define b_0010 13
-#define b_0011 12
-#define b_0100 11
-#define b_0101 10
-#define b_0110 9
-#define b_0111 8
-#define b_1000 7
-#define b_1001 6
-#define b_1010 5
-#define b_1011 4
-#define b_1100 3
-#define b_1101 2
-#define b_1110 1
-#define b_1111 0
-
-static unsigned char irtab[16] = {
- b_0000, b_1000, b_0100, b_1100,
- b_0010, b_1010, b_0110, b_1110,
- b_0001, b_1001, b_0101, b_1101,
- b_0011, b_1011, b_0111, b_1111
-};
+static inline unsigned char invert_revert(unsigned char ch)
+{
+ return bitrev8(~ch);
+}
static void str_invert_revert(unsigned char *b, int len)
{
int i;
for (i = 0; i < len; i++)
- b[i] = (irtab[b[i] & 0x0f] << 4) | irtab[b[i] >> 4];
-}
-
-static unsigned char invert_revert(unsigned char ch)
-{
- return (irtab[ch & 0x0f] << 4) | irtab[ch >> 4];
+ b[i] = invert_revert(b[i]);
}
#define ATRLENCK(dev,pos) \
@@ -1881,8 +1858,11 @@ static int cm4000_probe(struct pcmcia_device *link)
init_waitqueue_head(&dev->readq);
ret = cm4000_config(link, i);
- if (ret)
+ if (ret) {
+ dev_table[i] = NULL;
+ kfree(dev);
return ret;
+ }
class_device_create(cmm_class, NULL, MKDEV(major, i), NULL,
"cmm%d", i);
@@ -1907,7 +1887,7 @@ static void cm4000_detach(struct pcmcia_device *link)
cm4000_release(link);
dev_table[devno] = NULL;
- kfree(dev);
+ kfree(dev);
class_device_destroy(cmm_class, MKDEV(major, devno));
@@ -1956,12 +1936,14 @@ static int __init cmm_init(void)
if (major < 0) {
printk(KERN_WARNING MODULE_NAME
": could not get major number\n");
+ class_destroy(cmm_class);
return major;
}
rc = pcmcia_register_driver(&cm4000_driver);
if (rc < 0) {
unregister_chrdev(major, DEVICE_NAME);
+ class_destroy(cmm_class);
return rc;
}
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index f2e4ec4fd40..af88181a17f 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -636,8 +636,11 @@ static int reader_probe(struct pcmcia_device *link)
setup_timer(&dev->poll_timer, cm4040_do_poll, 0);
ret = reader_config(link, i);
- if (ret)
+ if (ret) {
+ dev_table[i] = NULL;
+ kfree(dev);
return ret;
+ }
class_device_create(cmx_class, NULL, MKDEV(major, i), NULL,
"cmx%d", i);
@@ -708,12 +711,14 @@ static int __init cm4040_init(void)
if (major < 0) {
printk(KERN_WARNING MODULE_NAME
": could not get major number\n");
+ class_destroy(cmx_class);
return major;
}
rc = pcmcia_register_driver(&reader_driver);
if (rc < 0) {
unregister_chrdev(major, DEVICE_NAME);
+ class_destroy(cmx_class);
return rc;
}
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index f6ac1d316ea..fc662e4ce58 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -934,13 +934,6 @@ restart:
return -EINVAL;
/*
- * No more input please, we are switching. The new ldisc
- * will update this value in the ldisc open function
- */
-
- tty->receive_room = 0;
-
- /*
* Problem: What do we do if this blocks ?
*/
@@ -951,6 +944,13 @@ restart:
return 0;
}
+ /*
+ * No more input please, we are switching. The new ldisc
+ * will update this value in the ldisc open function
+ */
+
+ tty->receive_room = 0;
+
o_ldisc = tty->ldisc;
o_tty = tty->link;
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 893dbaf386f..eb37fba9b7e 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1685,9 +1685,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
if (sys_dev) {
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
cpufreq_add_dev(sys_dev);
break;
case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
if (unlikely(lock_policy_rwsem_write(cpu)))
BUG();
@@ -1699,6 +1701,7 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
__cpufreq_remove_dev(sys_dev);
break;
case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
cpufreq_add_dev(sys_dev);
break;
}
diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c
index d1c7cac9316..d2f0cbd8b8f 100644
--- a/drivers/cpufreq/cpufreq_stats.c
+++ b/drivers/cpufreq/cpufreq_stats.c
@@ -313,9 +313,11 @@ static int cpufreq_stat_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
cpufreq_update_policy(cpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
cpufreq_stats_free_table(cpu);
break;
}
diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c
index 03b1f650d1c..75e3911810a 100644
--- a/drivers/hwmon/coretemp.c
+++ b/drivers/hwmon/coretemp.c
@@ -309,9 +309,11 @@ static int coretemp_cpu_callback(struct notifier_block *nfb,
switch (action) {
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
coretemp_device_add(cpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
coretemp_device_remove(cpu);
break;
}
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
index 7ed92dc3d83..3c3f2ebf3fc 100644
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -354,7 +354,7 @@ static void tps65010_interrupt(struct tps65010 *tps)
* also needs to get error handling and probably
* an #ifdef CONFIG_SOFTWARE_SUSPEND
*/
- pm_suspend(PM_SUSPEND_DISK);
+ hibernate();
#endif
poll = 1;
}
diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
index f284be1c916..82dda2faf4d 100644
--- a/drivers/infiniband/hw/ehca/ehca_irq.c
+++ b/drivers/infiniband/hw/ehca/ehca_irq.c
@@ -745,6 +745,7 @@ static int comp_pool_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_PREPARE)", cpu);
if(!create_comp_task(pool, cpu)) {
ehca_gen_err("Can't create comp_task for cpu: %x", cpu);
@@ -752,24 +753,29 @@ static int comp_pool_callback(struct notifier_block *nfb,
}
break;
case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_CANCELED)", cpu);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
kthread_bind(cct->task, any_online_cpu(cpu_online_map));
destroy_comp_task(pool, cpu);
break;
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_ONLINE)", cpu);
cct = per_cpu_ptr(pool->cpu_comp_tasks, cpu);
kthread_bind(cct->task, cpu);
wake_up_process(cct->task);
break;
case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_DOWN_PREPARE)", cpu);
break;
case CPU_DOWN_FAILED:
+ case CPU_DOWN_FAILED_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_DOWN_FAILED)", cpu);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
ehca_gen_dbg("CPU: %x (CPU_DEAD)", cpu);
destroy_comp_task(pool, cpu);
take_over_work(pool, cpu);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index c8b8cfa332b..0d892600ff0 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -2889,7 +2889,9 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
switch (val) {
case CPU_DOWN_PREPARE:
+ case CPU_DOWN_PREPARE_FROZEN:
case CPU_UP_CANCELED:
+ case CPU_UP_CANCELED_FROZEN:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
cpu);
decache_vcpus_on_cpu(cpu);
@@ -2897,6 +2899,7 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
NULL, 0, 1);
break;
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
cpu);
smp_call_function_single(cpu, kvm_arch_ops->hardware_enable,
diff --git a/drivers/mca/mca-bus.c b/drivers/mca/mca-bus.c
index da862e4632d..67b8e9453b1 100644
--- a/drivers/mca/mca-bus.c
+++ b/drivers/mca/mca-bus.c
@@ -47,19 +47,25 @@ static int mca_bus_match (struct device *dev, struct device_driver *drv)
{
struct mca_device *mca_dev = to_mca_device (dev);
struct mca_driver *mca_drv = to_mca_driver (drv);
- const short *mca_ids = mca_drv->id_table;
- int i;
-
- if (!mca_ids)
- return 0;
-
- for(i = 0; mca_ids[i]; i++) {
- if (mca_ids[i] == mca_dev->pos_id) {
- mca_dev->index = i;
- return 1;
+ const unsigned short *mca_ids = mca_drv->id_table;
+ int i = 0;
+
+ if (mca_ids) {
+ for(i = 0; mca_ids[i]; i++) {
+ if (mca_ids[i] == mca_dev->pos_id) {
+ mca_dev->index = i;
+ return 1;
+ }
}
}
-
+ /* If the integrated id is present, treat it as though it were an
+ * additional id in the id_table (it can't be because by definition,
+ * integrated id's overflow a short */
+ if (mca_drv->integrated_id && mca_dev->pos_id ==
+ mca_drv->integrated_id) {
+ mca_dev->index = i;
+ return 1;
+ }
return 0;
}
diff --git a/drivers/mca/mca-driver.c b/drivers/mca/mca-driver.c
index 2223466b3d8..32cd39bcc71 100644
--- a/drivers/mca/mca-driver.c
+++ b/drivers/mca/mca-driver.c
@@ -36,12 +36,25 @@ int mca_register_driver(struct mca_driver *mca_drv)
mca_drv->driver.bus = &mca_bus_type;
if ((r = driver_register(&mca_drv->driver)) < 0)
return r;
+ mca_drv->integrated_id = 0;
}
return 0;
}
EXPORT_SYMBOL(mca_register_driver);
+int mca_register_driver_integrated(struct mca_driver *mca_driver,
+ int integrated_id)
+{
+ int r = mca_register_driver(mca_driver);
+
+ if (!r)
+ mca_driver->integrated_id = integrated_id;
+
+ return r;
+}
+EXPORT_SYMBOL(mca_register_driver_integrated);
+
void mca_unregister_driver(struct mca_driver *mca_drv)
{
if (MCA_bus)
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 4540ade6b6b..7df934d6913 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -262,6 +262,15 @@ config DM_MULTIPATH_EMC
---help---
Multipath support for EMC CX/AX series hardware.
+config DM_DELAY
+ tristate "I/O delaying target (EXPERIMENTAL)"
+ depends on BLK_DEV_DM && EXPERIMENTAL
+ ---help---
+ A target that delays reads and/or writes and can send
+ them to different devices. Useful for testing.
+
+ If unsure, say N.
+
endmenu
endif
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 34957a68d92..38754084eac 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -31,6 +31,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o
obj-$(CONFIG_BLK_DEV_MD) += md-mod.o
obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o
obj-$(CONFIG_DM_CRYPT) += dm-crypt.o
+obj-$(CONFIG_DM_DELAY) += dm-delay.o
obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o
obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o
obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h
index da4349649f7..c6be88826fa 100644
--- a/drivers/md/dm-bio-list.h
+++ b/drivers/md/dm-bio-list.h
@@ -8,17 +8,43 @@
#define DM_BIO_LIST_H
#include <linux/bio.h>
+#include <linux/prefetch.h>
struct bio_list {
struct bio *head;
struct bio *tail;
};
+static inline int bio_list_empty(const struct bio_list *bl)
+{
+ return bl->head == NULL;
+}
+
+#define BIO_LIST_INIT { .head = NULL, .tail = NULL }
+
+#define BIO_LIST(bl) \
+ struct bio_list bl = BIO_LIST_INIT
+
static inline void bio_list_init(struct bio_list *bl)
{
bl->head = bl->tail = NULL;
}
+#define bio_list_for_each(bio, bl) \
+ for (bio = (bl)->head; bio && ({ prefetch(bio->bi_next); 1; }); \
+ bio = bio->bi_next)
+
+static inline unsigned bio_list_size(const struct bio_list *bl)
+{
+ unsigned sz = 0;
+ struct bio *bio;
+
+ bio_list_for_each(bio, bl)
+ sz++;
+
+ return sz;
+}
+
static inline void bio_list_add(struct bio_list *bl, struct bio *bio)
{
bio->bi_next = NULL;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index d8121234c34..7b0fcfc9eaa 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -33,7 +33,6 @@
struct crypt_io {
struct dm_target *target;
struct bio *base_bio;
- struct bio *first_clone;
struct work_struct work;
atomic_t pending;
int error;
@@ -107,6 +106,8 @@ struct crypt_config {
static struct kmem_cache *_crypt_io_pool;
+static void clone_init(struct crypt_io *, struct bio *);
+
/*
* Different IV generation algorithms:
*
@@ -120,6 +121,9 @@ static struct kmem_cache *_crypt_io_pool;
* benbi: the 64-bit "big-endian 'narrow block'-count", starting at 1
* (needed for LRW-32-AES and possible other narrow block modes)
*
+ * null: the initial vector is always zero. Provides compatibility with
+ * obsolete loop_fish2 devices. Do not use for new devices.
+ *
* plumb: unimplemented, see:
* http://article.gmane.org/gmane.linux.kernel.device-mapper.dm-crypt/454
*/
@@ -256,6 +260,13 @@ static int crypt_iv_benbi_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
return 0;
}
+static int crypt_iv_null_gen(struct crypt_config *cc, u8 *iv, sector_t sector)
+{
+ memset(iv, 0, cc->iv_size);
+
+ return 0;
+}
+
static struct crypt_iv_operations crypt_iv_plain_ops = {
.generator = crypt_iv_plain_gen
};
@@ -272,6 +283,10 @@ static struct crypt_iv_operations crypt_iv_benbi_ops = {
.generator = crypt_iv_benbi_gen
};
+static struct crypt_iv_operations crypt_iv_null_ops = {
+ .generator = crypt_iv_null_gen
+};
+
static int
crypt_convert_scatterlist(struct crypt_config *cc, struct scatterlist *out,
struct scatterlist *in, unsigned int length,
@@ -378,36 +393,21 @@ static int crypt_convert(struct crypt_config *cc,
* This should never violate the device limitations
* May return a smaller bio when running out of pages
*/
-static struct bio *
-crypt_alloc_buffer(struct crypt_config *cc, unsigned int size,
- struct bio *base_bio, unsigned int *bio_vec_idx)
+static struct bio *crypt_alloc_buffer(struct crypt_io *io, unsigned int size)
{
+ struct crypt_config *cc = io->target->private;
struct bio *clone;
unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM;
unsigned int i;
- if (base_bio) {
- clone = bio_alloc_bioset(GFP_NOIO, base_bio->bi_max_vecs, cc->bs);
- __bio_clone(clone, base_bio);
- } else
- clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
-
+ clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs);
if (!clone)
return NULL;
- clone->bi_destructor = dm_crypt_bio_destructor;
-
- /* if the last bio was not complete, continue where that one ended */
- clone->bi_idx = *bio_vec_idx;
- clone->bi_vcnt = *bio_vec_idx;
- clone->bi_size = 0;
- clone->bi_flags &= ~(1 << BIO_SEG_VALID);
-
- /* clone->bi_idx pages have already been allocated */
- size -= clone->bi_idx * PAGE_SIZE;
+ clone_init(io, clone);
- for (i = clone->bi_idx; i < nr_iovecs; i++) {
+ for (i = 0; i < nr_iovecs; i++) {
struct bio_vec *bv = bio_iovec_idx(clone, i);
bv->bv_page = mempool_alloc(cc->page_pool, gfp_mask);
@@ -419,7 +419,7 @@ crypt_alloc_buffer(struct crypt_config *cc, unsigned int size,
* return a partially allocated bio, the caller will then try
* to allocate additional bios while submitting this partial bio
*/
- if ((i - clone->bi_idx) == (MIN_BIO_PAGES - 1))
+ if (i == (MIN_BIO_PAGES - 1))
gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT;
bv->bv_offset = 0;
@@ -438,12 +438,6 @@ crypt_alloc_buffer(struct crypt_config *cc, unsigned int size,
return NULL;
}
- /*
- * Remember the last bio_vec allocated to be able
- * to correctly continue after the splitting.
- */
- *bio_vec_idx = clone->bi_vcnt;
-
return clone;
}
@@ -495,9 +489,6 @@ static void dec_pending(struct crypt_io *io, int error)
if (!atomic_dec_and_test(&io->pending))
return;
- if (io->first_clone)
- bio_put(io->first_clone);
-
bio_endio(io->base_bio, io->base_bio->bi_size, io->error);
mempool_free(io, cc->io_pool);
@@ -562,6 +553,7 @@ static void clone_init(struct crypt_io *io, struct bio *clone)
clone->bi_end_io = crypt_endio;
clone->bi_bdev = cc->dev->bdev;
clone->bi_rw = io->base_bio->bi_rw;
+ clone->bi_destructor = dm_crypt_bio_destructor;
}
static void process_read(struct crypt_io *io)
@@ -585,7 +577,6 @@ static void process_read(struct crypt_io *io)
}
clone_init(io, clone);
- clone->bi_destructor = dm_crypt_bio_destructor;
clone->bi_idx = 0;
clone->bi_vcnt = bio_segments(base_bio);
clone->bi_size = base_bio->bi_size;
@@ -604,7 +595,6 @@ static void process_write(struct crypt_io *io)
struct convert_context ctx;
unsigned remaining = base_bio->bi_size;
sector_t sector = base_bio->bi_sector - io->target->begin;
- unsigned bvec_idx = 0;
atomic_inc(&io->pending);
@@ -615,14 +605,14 @@ static void process_write(struct crypt_io *io)
* so repeat the whole process until all the data can be handled.
*/
while (remaining) {
- clone = crypt_alloc_buffer(cc, base_bio->bi_size,
- io->first_clone, &bvec_idx);
+ clone = crypt_alloc_buffer(io, remaining);
if (unlikely(!clone)) {
dec_pending(io, -ENOMEM);
return;
}
ctx.bio_out = clone;
+ ctx.idx_out = 0;
if (unlikely(crypt_convert(cc, &ctx) < 0)) {
crypt_free_buffer_pages(cc, clone, clone->bi_size);
@@ -631,31 +621,26 @@ static void process_write(struct crypt_io *io)
return;
}
- clone_init(io, clone);
- clone->bi_sector = cc->start + sector;
-
- if (!io->first_clone) {
- /*
- * hold a reference to the first clone, because it
- * holds the bio_vec array and that can't be freed
- * before all other clones are released
- */
- bio_get(clone);
- io->first_clone = clone;
- }
+ /* crypt_convert should have filled the clone bio */
+ BUG_ON(ctx.idx_out < clone->bi_vcnt);
+ clone->bi_sector = cc->start + sector;
remaining -= clone->bi_size;
sector += bio_sectors(clone);
- /* prevent bio_put of first_clone */
+ /* Grab another reference to the io struct
+ * before we kick off the request */
if (remaining)
atomic_inc(&io->pending);
generic_make_request(clone);
+ /* Do not reference clone after this - it
+ * may be gone already. */
+
/* out of memory -> run queues */
if (remaining)
- congestion_wait(bio_data_dir(clone), HZ/100);
+ congestion_wait(WRITE, HZ/100);
}
}
@@ -832,6 +817,8 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
cc->iv_gen_ops = &crypt_iv_essiv_ops;
else if (strcmp(ivmode, "benbi") == 0)
cc->iv_gen_ops = &crypt_iv_benbi_ops;
+ else if (strcmp(ivmode, "null") == 0)
+ cc->iv_gen_ops = &crypt_iv_null_ops;
else {
ti->error = "Invalid IV mode";
goto bad2;
@@ -954,10 +941,12 @@ static int crypt_map(struct dm_target *ti, struct bio *bio,
struct crypt_config *cc = ti->private;
struct crypt_io *io;
+ if (bio_barrier(bio))
+ return -EOPNOTSUPP;
+
io = mempool_alloc(cc->io_pool, GFP_NOIO);
io->target = ti;
io->base_bio = bio;
- io->first_clone = NULL;
io->error = io->post_process = 0;
atomic_set(&io->pending, 0);
kcryptd_queue_io(io);
@@ -1057,7 +1046,7 @@ error:
static struct target_type crypt_target = {
.name = "crypt",
- .version= {1, 3, 0},
+ .version= {1, 5, 0},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
new file mode 100644
index 00000000000..52c7cf9e580
--- /dev/null
+++ b/drivers/md/dm-delay.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright (C) 2005-2007 Red Hat GmbH
+ *
+ * A target that delays reads and/or writes and can send
+ * them to different devices.
+ *
+ * This file is released under the GPL.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/bio.h>
+#include <linux/slab.h>
+
+#include "dm.h"
+#include "dm-bio-list.h"
+
+#define DM_MSG_PREFIX "delay"
+
+struct delay_c {
+ struct timer_list delay_timer;
+ struct semaphore timer_lock;
+ struct work_struct flush_expired_bios;
+ struct list_head delayed_bios;
+ atomic_t may_delay;
+ mempool_t *delayed_pool;
+
+ struct dm_dev *dev_read;
+ sector_t start_read;
+ unsigned read_delay;
+ unsigned reads;
+
+ struct dm_dev *dev_write;
+ sector_t start_write;
+ unsigned write_delay;
+ unsigned writes;
+};
+
+struct delay_info {
+ struct delay_c *context;
+ struct list_head list;
+ struct bio *bio;
+ unsigned long expires;
+};
+
+static DEFINE_MUTEX(delayed_bios_lock);
+
+static struct workqueue_struct *kdelayd_wq;
+static struct kmem_cache *delayed_cache;
+
+static void handle_delayed_timer(unsigned long data)
+{
+ struct delay_c *dc = (struct delay_c *)data;
+
+ queue_work(kdelayd_wq, &dc->flush_expired_bios);
+}
+
+static void queue_timeout(struct delay_c *dc, unsigned long expires)
+{
+ down(&dc->timer_lock);
+
+ if (!timer_pending(&dc->delay_timer) || expires < dc->delay_timer.expires)
+ mod_timer(&dc->delay_timer, expires);
+
+ up(&dc->timer_lock);
+}
+
+static void flush_bios(struct bio *bio)
+{
+ struct bio *n;
+
+ while (bio) {
+ n = bio->bi_next;
+ bio->bi_next = NULL;
+ generic_make_request(bio);
+ bio = n;
+ }
+}
+
+static struct bio *flush_delayed_bios(struct delay_c *dc, int flush_all)
+{
+ struct delay_info *delayed, *next;
+ unsigned long next_expires = 0;
+ int start_timer = 0;
+ BIO_LIST(flush_bios);
+
+ mutex_lock(&delayed_bios_lock);
+ list_for_each_entry_safe(delayed, next, &dc->delayed_bios, list) {
+ if (flush_all || time_after_eq(jiffies, delayed->expires)) {
+ list_del(&delayed->list);
+ bio_list_add(&flush_bios, delayed->bio);
+ if ((bio_data_dir(delayed->bio) == WRITE))
+ delayed->context->writes--;
+ else
+ delayed->context->reads--;
+ mempool_free(delayed, dc->delayed_pool);
+ continue;
+ }
+
+ if (!start_timer) {
+ start_timer = 1;
+ next_expires = delayed->expires;
+ } else
+ next_expires = min(next_expires, delayed->expires);
+ }
+
+ mutex_unlock(&delayed_bios_lock);
+
+ if (start_timer)
+ queue_timeout(dc, next_expires);
+
+ return bio_list_get(&flush_bios);
+}
+
+static void flush_expired_bios(struct work_struct *work)
+{
+ struct delay_c *dc;
+
+ dc = container_of(work, struct delay_c, flush_expired_bios);
+ flush_bios(flush_delayed_bios(dc, 0));
+}
+
+/*
+ * Mapping parameters:
+ * <device> <offset> <delay> [<write_device> <write_offset> <write_delay>]
+ *
+ * With separate write parameters, the first set is only used for reads.
+ * Delays are specified in milliseconds.
+ */
+static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+{
+ struct delay_c *dc;
+ unsigned long long tmpll;
+
+ if (argc != 3 && argc != 6) {
+ ti->error = "requires exactly 3 or 6 arguments";
+ return -EINVAL;
+ }
+
+ dc = kmalloc(sizeof(*dc), GFP_KERNEL);
+ if (!dc) {
+ ti->error = "Cannot allocate context";
+ return -ENOMEM;
+ }
+
+ dc->reads = dc->writes = 0;
+
+ if (sscanf(argv[1], "%llu", &tmpll) != 1) {
+ ti->error = "Invalid device sector";
+ goto bad;
+ }
+ dc->start_read = tmpll;
+
+ if (sscanf(argv[2], "%u", &dc->read_delay) != 1) {
+ ti->error = "Invalid delay";
+ goto bad;
+ }
+
+ if (dm_get_device(ti, argv[0], dc->start_read, ti->len,
+ dm_table_get_mode(ti->table), &dc->dev_read)) {
+ ti->error = "Device lookup failed";
+ goto bad;
+ }
+
+ if (argc == 3) {
+ dc->dev_write = NULL;
+ goto out;
+ }
+
+ if (sscanf(argv[4], "%llu", &tmpll) != 1) {
+ ti->error = "Invalid write device sector";
+ goto bad;
+ }
+ dc->start_write = tmpll;
+
+ if (sscanf(argv[5], "%u", &dc->write_delay) != 1) {
+ ti->error = "Invalid write delay";
+ goto bad;
+ }
+
+ if (dm_get_device(ti, argv[3], dc->start_write, ti->len,
+ dm_table_get_mode(ti->table), &dc->dev_write)) {
+ ti->error = "Write device lookup failed";
+ dm_put_device(ti, dc->dev_read);
+ goto bad;
+ }
+
+out:
+ dc->delayed_pool = mempool_create_slab_pool(128, delayed_cache);
+ if (!dc->delayed_pool) {
+ DMERR("Couldn't create delayed bio pool.");
+ goto bad;
+ }
+
+ init_timer(&dc->delay_timer);
+ dc->delay_timer.function = handle_delayed_timer;
+ dc->delay_timer.data = (unsigned long)dc;
+
+ INIT_WORK(&dc->flush_expired_bios, flush_expired_bios);
+ INIT_LIST_HEAD(&dc->delayed_bios);
+ init_MUTEX(&dc->timer_lock);
+ atomic_set(&dc->may_delay, 1);
+
+ ti->private = dc;
+ return 0;
+
+bad:
+ kfree(dc);
+ return -EINVAL;
+}
+
+static void delay_dtr(struct dm_target *ti)
+{
+ struct delay_c *dc = ti->private;
+
+ flush_workqueue(kdelayd_wq);
+
+ dm_put_device(ti, dc->dev_read);
+
+ if (dc->dev_write)
+ dm_put_device(ti, dc->dev_write);
+
+ mempool_destroy(dc->delayed_pool);
+ kfree(dc);
+}
+
+static int delay_bio(struct delay_c *dc, int delay, struct bio *bio)
+{
+ struct delay_info *delayed;
+ unsigned long expires = 0;
+
+ if (!delay || !atomic_read(&dc->may_delay))
+ return 1;
+
+ delayed = mempool_alloc(dc->delayed_pool, GFP_NOIO);
+
+ delayed->context = dc;
+ delayed->bio = bio;
+ delayed->expires = expires = jiffies + (delay * HZ / 1000);
+
+ mutex_lock(&delayed_bios_lock);
+
+ if (bio_data_dir(bio) == WRITE)
+ dc->writes++;
+ else
+ dc->reads++;
+
+ list_add_tail(&delayed->list, &dc->delayed_bios);
+
+ mutex_unlock(&delayed_bios_lock);
+
+ queue_timeout(dc, expires);
+
+ return 0;
+}
+
+static void delay_presuspend(struct dm_target *ti)
+{
+ struct delay_c *dc = ti->private;
+
+ atomic_set(&dc->may_delay, 0);
+ del_timer_sync(&dc->delay_timer);
+ flush_bios(flush_delayed_bios(dc, 1));
+}
+
+static void delay_resume(struct dm_target *ti)
+{
+ struct delay_c *dc = ti->private;
+
+ atomic_set(&dc->may_delay, 1);
+}
+
+static int delay_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
+{
+ struct delay_c *dc = ti->private;
+
+ if ((bio_data_dir(bio) == WRITE) && (dc->dev_write)) {
+ bio->bi_bdev = dc->dev_write->bdev;
+ bio->bi_sector = dc->start_write +
+ (bio->bi_sector - ti->begin);
+
+ return delay_bio(dc, dc->write_delay, bio);
+ }
+
+ bio->bi_bdev = dc->dev_read->bdev;
+ bio->bi_sector = dc->start_read +
+ (bio->bi_sector - ti->begin);
+
+ return delay_bio(dc, dc->read_delay, bio);
+}
+
+static int delay_status(struct dm_target *ti, status_type_t type,
+ char *result, unsigned maxlen)
+{
+ struct delay_c *dc = ti->private;
+ int sz = 0;
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ DMEMIT("%u %u", dc->reads, dc->writes);
+ break;
+
+ case STATUSTYPE_TABLE:
+ DMEMIT("%s %llu %u", dc->dev_read->name,
+ (unsigned long long) dc->start_read,
+ dc->read_delay);
+ if (dc->dev_write)
+ DMEMIT("%s %llu %u", dc->dev_write->name,
+ (unsigned long long) dc->start_write,
+ dc->write_delay);
+ break;
+ }
+
+ return 0;
+}
+
+static struct target_type delay_target = {
+ .name = "delay",
+ .version = {1, 0, 2},
+ .module = THIS_MODULE,
+ .ctr = delay_ctr,
+ .dtr = delay_dtr,
+ .map = delay_map,
+ .presuspend = delay_presuspend,
+ .resume = delay_resume,
+ .status = delay_status,
+};
+
+static int __init dm_delay_init(void)
+{
+ int r = -ENOMEM;
+
+ kdelayd_wq = create_workqueue("kdelayd");
+ if (!kdelayd_wq) {
+ DMERR("Couldn't start kdelayd");
+ goto bad_queue;
+ }
+
+ delayed_cache = kmem_cache_create("dm-delay",
+ sizeof(struct delay_info),
+ __alignof__(struct delay_info),
+ 0, NULL, NULL);
+ if (!delayed_cache) {
+ DMERR("Couldn't create delayed bio cache.");
+ goto bad_memcache;
+ }
+
+ r = dm_register_target(&delay_target);
+ if (r < 0) {
+ DMERR("register failed %d", r);
+ goto bad_register;
+ }
+
+ return 0;
+
+bad_register:
+ kmem_cache_destroy(delayed_cache);
+bad_memcache:
+ destroy_workqueue(kdelayd_wq);
+bad_queue:
+ return r;
+}
+
+static void __exit dm_delay_exit(void)
+{
+ int r = dm_unregister_target(&delay_target);
+
+ if (r < 0)
+ DMERR("unregister failed %d", r);
+
+ kmem_cache_destroy(delayed_cache);
+ destroy_workqueue(kdelayd_wq);
+}
+
+/* Module hooks */
+module_init(dm_delay_init);
+module_exit(dm_delay_exit);
+
+MODULE_DESCRIPTION(DM_NAME " delay target");
+MODULE_AUTHOR("Heinz Mauelshagen <mauelshagen@redhat.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index 99cdffa7fbf..07e0a0c84f6 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -1,7 +1,8 @@
/*
- * dm-snapshot.c
+ * dm-exception-store.c
*
* Copyright (C) 2001-2002 Sistina Software (UK) Limited.
+ * Copyright (C) 2006 Red Hat GmbH
*
* This file is released under the GPL.
*/
@@ -123,6 +124,7 @@ struct pstore {
atomic_t pending_count;
uint32_t callback_count;
struct commit_callback *callbacks;
+ struct dm_io_client *io_client;
};
static inline unsigned int sectors_to_pages(unsigned int sectors)
@@ -159,14 +161,20 @@ static void free_area(struct pstore *ps)
*/
static int chunk_io(struct pstore *ps, uint32_t chunk, int rw)
{
- struct io_region where;
- unsigned long bits;
-
- where.bdev = ps->snap->cow->bdev;
- where.sector = ps->snap->chunk_size * chunk;
- where.count = ps->snap->chunk_size;
-
- return dm_io_sync_vm(1, &where, rw, ps->area, &bits);
+ struct io_region where = {
+ .bdev = ps->snap->cow->bdev,
+ .sector = ps->snap->chunk_size * chunk,
+ .count = ps->snap->chunk_size,
+ };
+ struct dm_io_request io_req = {
+ .bi_rw = rw,
+ .mem.type = DM_IO_VMA,
+ .mem.ptr.vma = ps->area,
+ .client = ps->io_client,
+ .notify.fn = NULL,
+ };
+
+ return dm_io(&io_req, 1, &where, NULL);
}
/*
@@ -213,17 +221,18 @@ static int read_header(struct pstore *ps, int *new_snapshot)
chunk_size_supplied = 0;
}
- r = dm_io_get(sectors_to_pages(ps->snap->chunk_size));
- if (r)
- return r;
+ ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap->
+ chunk_size));
+ if (IS_ERR(ps->io_client))
+ return PTR_ERR(ps->io_client);
r = alloc_area(ps);
if (r)
- goto bad1;
+ return r;
r = chunk_io(ps, 0, READ);
if (r)
- goto bad2;
+ goto bad;
dh = (struct disk_header *) ps->area;
@@ -235,7 +244,7 @@ static int read_header(struct pstore *ps, int *new_snapshot)
if (le32_to_cpu(dh->magic) != SNAP_MAGIC) {
DMWARN("Invalid or corrupt snapshot");
r = -ENXIO;
- goto bad2;
+ goto bad;
}
*new_snapshot = 0;
@@ -252,27 +261,22 @@ static int read_header(struct pstore *ps, int *new_snapshot)
(unsigned long long)ps->snap->chunk_size);
/* We had a bogus chunk_size. Fix stuff up. */
- dm_io_put(sectors_to_pages(ps->snap->chunk_size));
free_area(ps);
ps->snap->chunk_size = chunk_size;
ps->snap->chunk_mask = chunk_size - 1;
ps->snap->chunk_shift = ffs(chunk_size) - 1;
- r = dm_io_get(sectors_to_pages(chunk_size));
+ r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size),
+ ps->io_client);
if (r)
return r;
r = alloc_area(ps);
- if (r)
- goto bad1;
-
- return 0;
+ return r;
-bad2:
+bad:
free_area(ps);
-bad1:
- dm_io_put(sectors_to_pages(ps->snap->chunk_size));
return r;
}
@@ -405,7 +409,7 @@ static void persistent_destroy(struct exception_store *store)
{
struct pstore *ps = get_info(store);
- dm_io_put(sectors_to_pages(ps->snap->chunk_size));
+ dm_io_client_destroy(ps->io_client);
vfree(ps->callbacks);
free_area(ps);
kfree(ps);
diff --git a/drivers/md/dm-hw-handler.h b/drivers/md/dm-hw-handler.h
index 32eff28e4ad..e0832e6fcf3 100644
--- a/drivers/md/dm-hw-handler.h
+++ b/drivers/md/dm-hw-handler.h
@@ -16,6 +16,7 @@
struct hw_handler_type;
struct hw_handler {
struct hw_handler_type *type;
+ struct mapped_device *md;
void *context;
};
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 8bdc8a87b24..352c6fbeac5 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2003 Sistina Software
+ * Copyright (C) 2006 Red Hat GmbH
*
* This file is released under the GPL.
*/
@@ -12,13 +13,17 @@
#include <linux/sched.h>
#include <linux/slab.h>
-static struct bio_set *_bios;
+struct dm_io_client {
+ mempool_t *pool;
+ struct bio_set *bios;
+};
/* FIXME: can we shrink this ? */
struct io {
unsigned long error;
atomic_t count;
struct task_struct *sleeper;
+ struct dm_io_client *client;
io_notify_fn callback;
void *context;
};
@@ -26,63 +31,58 @@ struct io {
/*
* io contexts are only dynamically allocated for asynchronous
* io. Since async io is likely to be the majority of io we'll
- * have the same number of io contexts as buffer heads ! (FIXME:
- * must reduce this).
+ * have the same number of io contexts as bios! (FIXME: must reduce this).
*/
-static unsigned _num_ios;
-static mempool_t *_io_pool;
static unsigned int pages_to_ios(unsigned int pages)
{
return 4 * pages; /* too many ? */
}
-static int resize_pool(unsigned int new_ios)
+/*
+ * Create a client with mempool and bioset.
+ */
+struct dm_io_client *dm_io_client_create(unsigned num_pages)
{
- int r = 0;
-
- if (_io_pool) {
- if (new_ios == 0) {
- /* free off the pool */
- mempool_destroy(_io_pool);
- _io_pool = NULL;
- bioset_free(_bios);
-
- } else {
- /* resize the pool */
- r = mempool_resize(_io_pool, new_ios, GFP_KERNEL);
- }
+ unsigned ios = pages_to_ios(num_pages);
+ struct dm_io_client *client;
- } else {
- /* create new pool */
- _io_pool = mempool_create_kmalloc_pool(new_ios,
- sizeof(struct io));
- if (!_io_pool)
- return -ENOMEM;
-
- _bios = bioset_create(16, 16);
- if (!_bios) {
- mempool_destroy(_io_pool);
- _io_pool = NULL;
- return -ENOMEM;
- }
- }
+ client = kmalloc(sizeof(*client), GFP_KERNEL);
+ if (!client)
+ return ERR_PTR(-ENOMEM);
+
+ client->pool = mempool_create_kmalloc_pool(ios, sizeof(struct io));
+ if (!client->pool)
+ goto bad;
- if (!r)
- _num_ios = new_ios;
+ client->bios = bioset_create(16, 16);
+ if (!client->bios)
+ goto bad;
- return r;
+ return client;
+
+ bad:
+ if (client->pool)
+ mempool_destroy(client->pool);
+ kfree(client);
+ return ERR_PTR(-ENOMEM);
}
+EXPORT_SYMBOL(dm_io_client_create);
-int dm_io_get(unsigned int num_pages)
+int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client)
{
- return resize_pool(_num_ios + pages_to_ios(num_pages));
+ return mempool_resize(client->pool, pages_to_ios(num_pages),
+ GFP_KERNEL);
}
+EXPORT_SYMBOL(dm_io_client_resize);
-void dm_io_put(unsigned int num_pages)
+void dm_io_client_destroy(struct dm_io_client *client)
{
- resize_pool(_num_ios - pages_to_ios(num_pages));
+ mempool_destroy(client->pool);
+ bioset_free(client->bios);
+ kfree(client);
}
+EXPORT_SYMBOL(dm_io_client_destroy);
/*-----------------------------------------------------------------
* We need to keep track of which region a bio is doing io for.
@@ -118,7 +118,7 @@ static void dec_count(struct io *io, unsigned int region, int error)
io_notify_fn fn = io->callback;
void *context = io->context;
- mempool_free(io, _io_pool);
+ mempool_free(io, io->client->pool);
fn(r, context);
}
}
@@ -126,7 +126,8 @@ static void dec_count(struct io *io, unsigned int region, int error)
static int endio(struct bio *bio, unsigned int done, int error)
{
- struct io *io = (struct io *) bio->bi_private;
+ struct io *io;
+ unsigned region;
/* keep going until we've finished */
if (bio->bi_size)
@@ -135,10 +136,17 @@ static int endio(struct bio *bio, unsigned int done, int error)
if (error && bio_data_dir(bio) == READ)
zero_fill_bio(bio);
- dec_count(io, bio_get_region(bio), error);
+ /*
+ * The bio destructor in bio_put() may use the io object.
+ */
+ io = bio->bi_private;
+ region = bio_get_region(bio);
+
bio->bi_max_vecs++;
bio_put(bio);
+ dec_count(io, region, error);
+
return 0;
}
@@ -209,6 +217,9 @@ static void bvec_dp_init(struct dpages *dp, struct bio_vec *bvec)
dp->context_ptr = bvec;
}
+/*
+ * Functions for getting the pages from a VMA.
+ */
static void vm_get_page(struct dpages *dp,
struct page **p, unsigned long *len, unsigned *offset)
{
@@ -233,7 +244,34 @@ static void vm_dp_init(struct dpages *dp, void *data)
static void dm_bio_destructor(struct bio *bio)
{
- bio_free(bio, _bios);
+ struct io *io = bio->bi_private;
+
+ bio_free(bio, io->client->bios);
+}
+
+/*
+ * Functions for getting the pages from kernel memory.
+ */
+static void km_get_page(struct dpages *dp, struct page **p, unsigned long *len,
+ unsigned *offset)
+{
+ *p = virt_to_page(dp->context_ptr);
+ *offset = dp->context_u;
+ *len = PAGE_SIZE - dp->context_u;
+}
+
+static void km_next_page(struct dpages *dp)
+{
+ dp->context_ptr += PAGE_SIZE - dp->context_u;
+ dp->context_u = 0;
+}
+
+static void km_dp_init(struct dpages *dp, void *data)
+{
+ dp->get_page = km_get_page;
+ dp->next_page = km_next_page;
+ dp->context_u = ((unsigned long) data) & (PAGE_SIZE - 1);
+ dp->context_ptr = data;
}
/*-----------------------------------------------------------------
@@ -256,7 +294,7 @@ static void do_region(int rw, unsigned int region, struct io_region *where,
* to hide it from bio_add_page().
*/
num_bvecs = (remaining / (PAGE_SIZE >> SECTOR_SHIFT)) + 2;
- bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, _bios);
+ bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, io->client->bios);
bio->bi_sector = where->sector + (where->count - remaining);
bio->bi_bdev = where->bdev;
bio->bi_end_io = endio;
@@ -311,8 +349,9 @@ static void dispatch_io(int rw, unsigned int num_regions,
dec_count(io, 0, 0);
}
-static int sync_io(unsigned int num_regions, struct io_region *where,
- int rw, struct dpages *dp, unsigned long *error_bits)
+static int sync_io(struct dm_io_client *client, unsigned int num_regions,
+ struct io_region *where, int rw, struct dpages *dp,
+ unsigned long *error_bits)
{
struct io io;
@@ -324,6 +363,7 @@ static int sync_io(unsigned int num_regions, struct io_region *where,
io.error = 0;
atomic_set(&io.count, 1); /* see dispatch_io() */
io.sleeper = current;
+ io.client = client;
dispatch_io(rw, num_regions, where, dp, &io, 1);
@@ -340,12 +380,15 @@ static int sync_io(unsigned int num_regions, struct io_region *where,
if (atomic_read(&io.count))
return -EINTR;
- *error_bits = io.error;
+ if (error_bits)
+ *error_bits = io.error;
+
return io.error ? -EIO : 0;
}
-static int async_io(unsigned int num_regions, struct io_region *where, int rw,
- struct dpages *dp, io_notify_fn fn, void *context)
+static int async_io(struct dm_io_client *client, unsigned int num_regions,
+ struct io_region *where, int rw, struct dpages *dp,
+ io_notify_fn fn, void *context)
{
struct io *io;
@@ -355,10 +398,11 @@ static int async_io(unsigned int num_regions, struct io_region *where, int rw,
return -EIO;
}
- io = mempool_alloc(_io_pool, GFP_NOIO);
+ io = mempool_alloc(client->pool, GFP_NOIO);
io->error = 0;
atomic_set(&io->count, 1); /* see dispatch_io() */
io->sleeper = NULL;
+ io->client = client;
io->callback = fn;
io->context = context;
@@ -366,61 +410,51 @@ static int async_io(unsigned int num_regions, struct io_region *where, int rw,
return 0;
}
-int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
- struct page_list *pl, unsigned int offset,
- unsigned long *error_bits)
+static int dp_init(struct dm_io_request *io_req, struct dpages *dp)
{
- struct dpages dp;
- list_dp_init(&dp, pl, offset);
- return sync_io(num_regions, where, rw, &dp, error_bits);
-}
+ /* Set up dpages based on memory type */
+ switch (io_req->mem.type) {
+ case DM_IO_PAGE_LIST:
+ list_dp_init(dp, io_req->mem.ptr.pl, io_req->mem.offset);
+ break;
+
+ case DM_IO_BVEC:
+ bvec_dp_init(dp, io_req->mem.ptr.bvec);
+ break;
+
+ case DM_IO_VMA:
+ vm_dp_init(dp, io_req->mem.ptr.vma);
+ break;
+
+ case DM_IO_KMEM:
+ km_dp_init(dp, io_req->mem.ptr.addr);
+ break;
+
+ default:
+ return -EINVAL;
+ }
-int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw,
- struct bio_vec *bvec, unsigned long *error_bits)
-{
- struct dpages dp;
- bvec_dp_init(&dp, bvec);
- return sync_io(num_regions, where, rw, &dp, error_bits);
+ return 0;
}
-int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
- void *data, unsigned long *error_bits)
+/*
+ * New collapsed (a)synchronous interface
+ */
+int dm_io(struct dm_io_request *io_req, unsigned num_regions,
+ struct io_region *where, unsigned long *sync_error_bits)
{
+ int r;
struct dpages dp;
- vm_dp_init(&dp, data);
- return sync_io(num_regions, where, rw, &dp, error_bits);
-}
-int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
- struct page_list *pl, unsigned int offset,
- io_notify_fn fn, void *context)
-{
- struct dpages dp;
- list_dp_init(&dp, pl, offset);
- return async_io(num_regions, where, rw, &dp, fn, context);
-}
+ r = dp_init(io_req, &dp);
+ if (r)
+ return r;
-int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw,
- struct bio_vec *bvec, io_notify_fn fn, void *context)
-{
- struct dpages dp;
- bvec_dp_init(&dp, bvec);
- return async_io(num_regions, where, rw, &dp, fn, context);
-}
+ if (!io_req->notify.fn)
+ return sync_io(io_req->client, num_regions, where,
+ io_req->bi_rw, &dp, sync_error_bits);
-int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
- void *data, io_notify_fn fn, void *context)
-{
- struct dpages dp;
- vm_dp_init(&dp, data);
- return async_io(num_regions, where, rw, &dp, fn, context);
+ return async_io(io_req->client, num_regions, where, io_req->bi_rw,
+ &dp, io_req->notify.fn, io_req->notify.context);
}
-
-EXPORT_SYMBOL(dm_io_get);
-EXPORT_SYMBOL(dm_io_put);
-EXPORT_SYMBOL(dm_io_sync);
-EXPORT_SYMBOL(dm_io_async);
-EXPORT_SYMBOL(dm_io_sync_bvec);
-EXPORT_SYMBOL(dm_io_async_bvec);
-EXPORT_SYMBOL(dm_io_sync_vm);
-EXPORT_SYMBOL(dm_io_async_vm);
+EXPORT_SYMBOL(dm_io);
diff --git a/drivers/md/dm-io.h b/drivers/md/dm-io.h
index f9035bfd1a9..f647e2cceaa 100644
--- a/drivers/md/dm-io.h
+++ b/drivers/md/dm-io.h
@@ -12,7 +12,7 @@
struct io_region {
struct block_device *bdev;
sector_t sector;
- sector_t count;
+ sector_t count; /* If this is zero the region is ignored. */
};
struct page_list {
@@ -20,55 +20,60 @@ struct page_list {
struct page *page;
};
-
-/*
- * 'error' is a bitset, with each bit indicating whether an error
- * occurred doing io to the corresponding region.
- */
typedef void (*io_notify_fn)(unsigned long error, void *context);
+enum dm_io_mem_type {
+ DM_IO_PAGE_LIST,/* Page list */
+ DM_IO_BVEC, /* Bio vector */
+ DM_IO_VMA, /* Virtual memory area */
+ DM_IO_KMEM, /* Kernel memory */
+};
+
+struct dm_io_memory {
+ enum dm_io_mem_type type;
+
+ union {
+ struct page_list *pl;
+ struct bio_vec *bvec;
+ void *vma;
+ void *addr;
+ } ptr;
+
+ unsigned offset;
+};
+
+struct dm_io_notify {
+ io_notify_fn fn; /* Callback for asynchronous requests */
+ void *context; /* Passed to callback */
+};
/*
- * Before anyone uses the IO interface they should call
- * dm_io_get(), specifying roughly how many pages they are
- * expecting to perform io on concurrently.
- *
- * This function may block.
+ * IO request structure
*/
-int dm_io_get(unsigned int num_pages);
-void dm_io_put(unsigned int num_pages);
+struct dm_io_client;
+struct dm_io_request {
+ int bi_rw; /* READ|WRITE - not READA */
+ struct dm_io_memory mem; /* Memory to use for io */
+ struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */
+ struct dm_io_client *client; /* Client memory handler */
+};
/*
- * Synchronous IO.
+ * For async io calls, users can alternatively use the dm_io() function below
+ * and dm_io_client_create() to create private mempools for the client.
*
- * Please ensure that the rw flag in the next two functions is
- * either READ or WRITE, ie. we don't take READA. Any
- * regions with a zero count field will be ignored.
+ * Create/destroy may block.
*/
-int dm_io_sync(unsigned int num_regions, struct io_region *where, int rw,
- struct page_list *pl, unsigned int offset,
- unsigned long *error_bits);
-
-int dm_io_sync_bvec(unsigned int num_regions, struct io_region *where, int rw,
- struct bio_vec *bvec, unsigned long *error_bits);
-
-int dm_io_sync_vm(unsigned int num_regions, struct io_region *where, int rw,
- void *data, unsigned long *error_bits);
+struct dm_io_client *dm_io_client_create(unsigned num_pages);
+int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client);
+void dm_io_client_destroy(struct dm_io_client *client);
/*
- * Aynchronous IO.
- *
- * The 'where' array may be safely allocated on the stack since
- * the function takes a copy.
+ * IO interface using private per-client pools.
+ * Each bit in the optional 'sync_error_bits' bitset indicates whether an
+ * error occurred doing io to the corresponding region.
*/
-int dm_io_async(unsigned int num_regions, struct io_region *where, int rw,
- struct page_list *pl, unsigned int offset,
- io_notify_fn fn, void *context);
-
-int dm_io_async_bvec(unsigned int num_regions, struct io_region *where, int rw,
- struct bio_vec *bvec, io_notify_fn fn, void *context);
-
-int dm_io_async_vm(unsigned int num_regions, struct io_region *where, int rw,
- void *data, io_notify_fn fn, void *context);
+int dm_io(struct dm_io_request *io_req, unsigned num_regions,
+ struct io_region *region, unsigned long *sync_error_bits);
#endif
diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c
index 6a926135184..a66428d860f 100644
--- a/drivers/md/dm-log.c
+++ b/drivers/md/dm-log.c
@@ -149,9 +149,12 @@ struct log_c {
FORCESYNC, /* Force a sync to happen */
} sync;
+ struct dm_io_request io_req;
+
/*
* Disk log fields
*/
+ int log_dev_failed;
struct dm_dev *log_dev;
struct log_header header;
@@ -199,13 +202,20 @@ static void header_from_disk(struct log_header *core, struct log_header *disk)
core->nr_regions = le64_to_cpu(disk->nr_regions);
}
+static int rw_header(struct log_c *lc, int rw)
+{
+ lc->io_req.bi_rw = rw;
+ lc->io_req.mem.ptr.vma = lc->disk_header;
+ lc->io_req.notify.fn = NULL;
+
+ return dm_io(&lc->io_req, 1, &lc->header_location, NULL);
+}
+
static int read_header(struct log_c *log)
{
int r;
- unsigned long ebits;
- r = dm_io_sync_vm(1, &log->header_location, READ,
- log->disk_header, &ebits);
+ r = rw_header(log, READ);
if (r)
return r;
@@ -233,11 +243,8 @@ static int read_header(struct log_c *log)
static inline int write_header(struct log_c *log)
{
- unsigned long ebits;
-
header_to_disk(&log->header, log->disk_header);
- return dm_io_sync_vm(1, &log->header_location, WRITE,
- log->disk_header, &ebits);
+ return rw_header(log, WRITE);
}
/*----------------------------------------------------------------
@@ -256,6 +263,7 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti,
uint32_t region_size;
unsigned int region_count;
size_t bitset_size, buf_size;
+ int r;
if (argc < 1 || argc > 2) {
DMWARN("wrong number of arguments to mirror log");
@@ -315,6 +323,7 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti,
lc->disk_header = NULL;
} else {
lc->log_dev = dev;
+ lc->log_dev_failed = 0;
lc->header_location.bdev = lc->log_dev->bdev;
lc->header_location.sector = 0;
@@ -324,6 +333,15 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti,
buf_size = dm_round_up((LOG_OFFSET << SECTOR_SHIFT) +
bitset_size, ti->limits.hardsect_size);
lc->header_location.count = buf_size >> SECTOR_SHIFT;
+ lc->io_req.mem.type = DM_IO_VMA;
+ lc->io_req.client = dm_io_client_create(dm_div_up(buf_size,
+ PAGE_SIZE));
+ if (IS_ERR(lc->io_req.client)) {
+ r = PTR_ERR(lc->io_req.client);
+ DMWARN("couldn't allocate disk io client");
+ kfree(lc);
+ return -ENOMEM;
+ }
lc->disk_header = vmalloc(buf_size);
if (!lc->disk_header) {
@@ -424,6 +442,7 @@ static void disk_dtr(struct dirty_log *log)
dm_put_device(lc->ti, lc->log_dev);
vfree(lc->disk_header);
+ dm_io_client_destroy(lc->io_req.client);
destroy_log_context(lc);
}
@@ -437,6 +456,15 @@ static int count_bits32(uint32_t *addr, unsigned size)
return count;
}
+static void fail_log_device(struct log_c *lc)
+{
+ if (lc->log_dev_failed)
+ return;
+
+ lc->log_dev_failed = 1;
+ dm_table_event(lc->ti->table);
+}
+
static int disk_resume(struct dirty_log *log)
{
int r;
@@ -446,8 +474,19 @@ static int disk_resume(struct dirty_log *log)
/* read the disk header */
r = read_header(lc);
- if (r)
- return r;
+ if (r) {
+ DMWARN("%s: Failed to read header on mirror log device",
+ lc->log_dev->name);
+ fail_log_device(lc);
+ /*
+ * If the log device cannot be read, we must assume
+ * all regions are out-of-sync. If we simply return
+ * here, the state will be uninitialized and could
+ * lead us to return 'in-sync' status for regions
+ * that are actually 'out-of-sync'.
+ */
+ lc->header.nr_regions = 0;
+ }
/* set or clear any new bits -- device has grown */
if (lc->sync == NOSYNC)
@@ -472,7 +511,14 @@ static int disk_resume(struct dirty_log *log)
lc->header.nr_regions = lc->region_count;
/* write the new header */
- return write_header(lc);
+ r = write_header(lc);
+ if (r) {
+ DMWARN("%s: Failed to write header on mirror log device",
+ lc->log_dev->name);
+ fail_log_device(lc);
+ }
+
+ return r;
}
static uint32_t core_get_region_size(struct dirty_log *log)
@@ -516,7 +562,9 @@ static int disk_flush(struct dirty_log *log)
return 0;
r = write_header(lc);
- if (!r)
+ if (r)
+ fail_log_device(lc);
+ else
lc->touched = 0;
return r;
@@ -591,6 +639,7 @@ static int core_status(struct dirty_log *log, status_type_t status,
switch(status) {
case STATUSTYPE_INFO:
+ DMEMIT("1 %s", log->type->name);
break;
case STATUSTYPE_TABLE:
@@ -606,17 +655,17 @@ static int disk_status(struct dirty_log *log, status_type_t status,
char *result, unsigned int maxlen)
{
int sz = 0;
- char buffer[16];
struct log_c *lc = log->context;
switch(status) {
case STATUSTYPE_INFO:
+ DMEMIT("3 %s %s %c", log->type->name, lc->log_dev->name,
+ lc->log_dev_failed ? 'D' : 'A');
break;
case STATUSTYPE_TABLE:
- format_dev_t(buffer, lc->log_dev->bdev->bd_dev);
DMEMIT("%s %u %s %u ", log->type->name,
- lc->sync == DEFAULTSYNC ? 2 : 3, buffer,
+ lc->sync == DEFAULTSYNC ? 2 : 3, lc->log_dev->name,
lc->region_size);
DMEMIT_SYNC;
}
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 3aa01350696..de54b39e6ff 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -668,6 +668,9 @@ static int parse_hw_handler(struct arg_set *as, struct multipath *m)
return -EINVAL;
}
+ m->hw_handler.md = dm_table_get_md(ti->table);
+ dm_put(m->hw_handler.md);
+
r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv);
if (r) {
dm_put_hw_handler(hwht);
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 23a642619be..ef124b71ccc 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -21,15 +21,11 @@
#include <linux/workqueue.h>
#define DM_MSG_PREFIX "raid1"
+#define DM_IO_PAGES 64
-static struct workqueue_struct *_kmirrord_wq;
-static struct work_struct _kmirrord_work;
-static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
+#define DM_RAID1_HANDLE_ERRORS 0x01
-static inline void wake(void)
-{
- queue_work(_kmirrord_wq, &_kmirrord_work);
-}
+static DECLARE_WAIT_QUEUE_HEAD(_kmirrord_recovery_stopped);
/*-----------------------------------------------------------------
* Region hash
@@ -125,17 +121,23 @@ struct mirror_set {
struct list_head list;
struct region_hash rh;
struct kcopyd_client *kcopyd_client;
+ uint64_t features;
spinlock_t lock; /* protects the next two lists */
struct bio_list reads;
struct bio_list writes;
+ struct dm_io_client *io_client;
+
/* recovery */
region_t nr_regions;
int in_sync;
struct mirror *default_mirror; /* Default mirror */
+ struct workqueue_struct *kmirrord_wq;
+ struct work_struct kmirrord_work;
+
unsigned int nr_mirrors;
struct mirror mirror[0];
};
@@ -153,6 +155,11 @@ static inline sector_t region_to_sector(struct region_hash *rh, region_t region)
return region << rh->region_shift;
}
+static void wake(struct mirror_set *ms)
+{
+ queue_work(ms->kmirrord_wq, &ms->kmirrord_work);
+}
+
/* FIXME move this */
static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw);
@@ -398,8 +405,7 @@ static void rh_update_states(struct region_hash *rh)
mempool_free(reg, rh->region_pool);
}
- if (!list_empty(&recovered))
- rh->log->type->flush(rh->log);
+ rh->log->type->flush(rh->log);
list_for_each_entry_safe (reg, next, &clean, list)
mempool_free(reg, rh->region_pool);
@@ -471,7 +477,7 @@ static void rh_dec(struct region_hash *rh, region_t region)
spin_unlock_irqrestore(&rh->region_lock, flags);
if (should_wake)
- wake();
+ wake(rh->ms);
}
/*
@@ -558,7 +564,7 @@ static void rh_recovery_end(struct region *reg, int success)
list_add(&reg->list, &reg->rh->recovered_regions);
spin_unlock_irq(&rh->region_lock);
- wake();
+ wake(rh->ms);
}
static void rh_flush(struct region_hash *rh)
@@ -592,7 +598,7 @@ static void rh_start_recovery(struct region_hash *rh)
for (i = 0; i < MAX_RECOVERY; i++)
up(&rh->recovery_count);
- wake();
+ wake(rh->ms);
}
/*
@@ -735,7 +741,7 @@ static void do_reads(struct mirror_set *ms, struct bio_list *reads)
/*
* We can only read balance if the region is in sync.
*/
- if (rh_in_sync(&ms->rh, region, 0))
+ if (rh_in_sync(&ms->rh, region, 1))
m = choose_mirror(ms, bio->bi_sector);
else
m = ms->default_mirror;
@@ -792,6 +798,14 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
unsigned int i;
struct io_region io[KCOPYD_MAX_REGIONS+1];
struct mirror *m;
+ struct dm_io_request io_req = {
+ .bi_rw = WRITE,
+ .mem.type = DM_IO_BVEC,
+ .mem.ptr.bvec = bio->bi_io_vec + bio->bi_idx,
+ .notify.fn = write_callback,
+ .notify.context = bio,
+ .client = ms->io_client,
+ };
for (i = 0; i < ms->nr_mirrors; i++) {
m = ms->mirror + i;
@@ -802,9 +816,8 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
}
bio_set_ms(bio, ms);
- dm_io_async_bvec(ms->nr_mirrors, io, WRITE,
- bio->bi_io_vec + bio->bi_idx,
- write_callback, bio);
+
+ (void) dm_io(&io_req, ms->nr_mirrors, io, NULL);
}
static void do_writes(struct mirror_set *ms, struct bio_list *writes)
@@ -870,11 +883,10 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
/*-----------------------------------------------------------------
* kmirrord
*---------------------------------------------------------------*/
-static LIST_HEAD(_mirror_sets);
-static DECLARE_RWSEM(_mirror_sets_lock);
-
-static void do_mirror(struct mirror_set *ms)
+static void do_mirror(struct work_struct *work)
{
+ struct mirror_set *ms =container_of(work, struct mirror_set,
+ kmirrord_work);
struct bio_list reads, writes;
spin_lock(&ms->lock);
@@ -890,16 +902,6 @@ static void do_mirror(struct mirror_set *ms)
do_writes(ms, &writes);
}
-static void do_work(struct work_struct *ignored)
-{
- struct mirror_set *ms;
-
- down_read(&_mirror_sets_lock);
- list_for_each_entry (ms, &_mirror_sets, list)
- do_mirror(ms);
- up_read(&_mirror_sets_lock);
-}
-
/*-----------------------------------------------------------------
* Target functions
*---------------------------------------------------------------*/
@@ -931,6 +933,13 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
ms->in_sync = 0;
ms->default_mirror = &ms->mirror[DEFAULT_MIRROR];
+ ms->io_client = dm_io_client_create(DM_IO_PAGES);
+ if (IS_ERR(ms->io_client)) {
+ ti->error = "Error creating dm_io client";
+ kfree(ms);
+ return NULL;
+ }
+
if (rh_init(&ms->rh, ms, dl, region_size, ms->nr_regions)) {
ti->error = "Error creating dirty region hash";
kfree(ms);
@@ -946,6 +955,7 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti,
while (m--)
dm_put_device(ti, ms->mirror[m].dev);
+ dm_io_client_destroy(ms->io_client);
rh_exit(&ms->rh);
kfree(ms);
}
@@ -978,23 +988,6 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
return 0;
}
-static int add_mirror_set(struct mirror_set *ms)
-{
- down_write(&_mirror_sets_lock);
- list_add_tail(&ms->list, &_mirror_sets);
- up_write(&_mirror_sets_lock);
- wake();
-
- return 0;
-}
-
-static void del_mirror_set(struct mirror_set *ms)
-{
- down_write(&_mirror_sets_lock);
- list_del(&ms->list);
- up_write(&_mirror_sets_lock);
-}
-
/*
* Create dirty log: log_type #log_params <log_params>
*/
@@ -1037,16 +1030,55 @@ static struct dirty_log *create_dirty_log(struct dm_target *ti,
return dl;
}
+static int parse_features(struct mirror_set *ms, unsigned argc, char **argv,
+ unsigned *args_used)
+{
+ unsigned num_features;
+ struct dm_target *ti = ms->ti;
+
+ *args_used = 0;
+
+ if (!argc)
+ return 0;
+
+ if (sscanf(argv[0], "%u", &num_features) != 1) {
+ ti->error = "Invalid number of features";
+ return -EINVAL;
+ }
+
+ argc--;
+ argv++;
+ (*args_used)++;
+
+ if (num_features > argc) {
+ ti->error = "Not enough arguments to support feature count";
+ return -EINVAL;
+ }
+
+ if (!strcmp("handle_errors", argv[0]))
+ ms->features |= DM_RAID1_HANDLE_ERRORS;
+ else {
+ ti->error = "Unrecognised feature requested";
+ return -EINVAL;
+ }
+
+ (*args_used)++;
+
+ return 0;
+}
+
/*
* Construct a mirror mapping:
*
* log_type #log_params <log_params>
* #mirrors [mirror_path offset]{2,}
+ * [#features <features>]
*
* log_type is "core" or "disk"
* #log_params is between 1 and 3
+ *
+ * If present, features must be "handle_errors".
*/
-#define DM_IO_PAGES 64
static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
int r;
@@ -1070,8 +1102,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
argv++, argc--;
- if (argc != nr_mirrors * 2) {
- ti->error = "Wrong number of mirror arguments";
+ if (argc < nr_mirrors * 2) {
+ ti->error = "Too few mirror arguments";
dm_destroy_dirty_log(dl);
return -EINVAL;
}
@@ -1096,13 +1128,37 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->private = ms;
ti->split_io = ms->rh.region_size;
+ ms->kmirrord_wq = create_singlethread_workqueue("kmirrord");
+ if (!ms->kmirrord_wq) {
+ DMERR("couldn't start kmirrord");
+ free_context(ms, ti, m);
+ return -ENOMEM;
+ }
+ INIT_WORK(&ms->kmirrord_work, do_mirror);
+
+ r = parse_features(ms, argc, argv, &args_used);
+ if (r) {
+ free_context(ms, ti, ms->nr_mirrors);
+ return r;
+ }
+
+ argv += args_used;
+ argc -= args_used;
+
+ if (argc) {
+ ti->error = "Too many mirror arguments";
+ free_context(ms, ti, ms->nr_mirrors);
+ return -EINVAL;
+ }
+
r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client);
if (r) {
+ destroy_workqueue(ms->kmirrord_wq);
free_context(ms, ti, ms->nr_mirrors);
return r;
}
- add_mirror_set(ms);
+ wake(ms);
return 0;
}
@@ -1110,8 +1166,9 @@ static void mirror_dtr(struct dm_target *ti)
{
struct mirror_set *ms = (struct mirror_set *) ti->private;
- del_mirror_set(ms);
+ flush_workqueue(ms->kmirrord_wq);
kcopyd_client_destroy(ms->kcopyd_client);
+ destroy_workqueue(ms->kmirrord_wq);
free_context(ms, ti, ms->nr_mirrors);
}
@@ -1127,7 +1184,7 @@ static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw)
spin_unlock(&ms->lock);
if (should_wake)
- wake();
+ wake(ms);
}
/*
@@ -1222,11 +1279,9 @@ static void mirror_resume(struct dm_target *ti)
static int mirror_status(struct dm_target *ti, status_type_t type,
char *result, unsigned int maxlen)
{
- unsigned int m, sz;
+ unsigned int m, sz = 0;
struct mirror_set *ms = (struct mirror_set *) ti->private;
- sz = ms->rh.log->type->status(ms->rh.log, type, result, maxlen);
-
switch (type) {
case STATUSTYPE_INFO:
DMEMIT("%d ", ms->nr_mirrors);
@@ -1237,13 +1292,21 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
(unsigned long long)ms->rh.log->type->
get_sync_count(ms->rh.log),
(unsigned long long)ms->nr_regions);
+
+ sz = ms->rh.log->type->status(ms->rh.log, type, result, maxlen);
+
break;
case STATUSTYPE_TABLE:
+ sz = ms->rh.log->type->status(ms->rh.log, type, result, maxlen);
+
DMEMIT("%d", ms->nr_mirrors);
for (m = 0; m < ms->nr_mirrors; m++)
DMEMIT(" %s %llu", ms->mirror[m].dev->name,
(unsigned long long)ms->mirror[m].offset);
+
+ if (ms->features & DM_RAID1_HANDLE_ERRORS)
+ DMEMIT(" 1 handle_errors");
}
return 0;
@@ -1251,7 +1314,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
static struct target_type mirror_target = {
.name = "mirror",
- .version = {1, 0, 2},
+ .version = {1, 0, 3},
.module = THIS_MODULE,
.ctr = mirror_ctr,
.dtr = mirror_dtr,
@@ -1270,20 +1333,11 @@ static int __init dm_mirror_init(void)
if (r)
return r;
- _kmirrord_wq = create_singlethread_workqueue("kmirrord");
- if (!_kmirrord_wq) {
- DMERR("couldn't start kmirrord");
- dm_dirty_log_exit();
- return r;
- }
- INIT_WORK(&_kmirrord_work, do_work);
-
r = dm_register_target(&mirror_target);
if (r < 0) {
DMERR("%s: Failed to register mirror target",
mirror_target.name);
dm_dirty_log_exit();
- destroy_workqueue(_kmirrord_wq);
}
return r;
@@ -1297,7 +1351,6 @@ static void __exit dm_mirror_exit(void)
if (r < 0)
DMERR("%s: unregister failed %d", mirror_target.name, r);
- destroy_workqueue(_kmirrord_wq);
dm_dirty_log_exit();
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 05befa91807..2fc199b0016 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -425,13 +425,15 @@ static void close_dev(struct dm_dev *d, struct mapped_device *md)
}
/*
- * If possible (ie. blk_size[major] is set), this checks an area
- * of a destination device is valid.
+ * If possible, this checks an area of a destination device is valid.
*/
static int check_device_area(struct dm_dev *dd, sector_t start, sector_t len)
{
- sector_t dev_size;
- dev_size = dd->bdev->bd_inode->i_size >> SECTOR_SHIFT;
+ sector_t dev_size = dd->bdev->bd_inode->i_size >> SECTOR_SHIFT;
+
+ if (!dev_size)
+ return 1;
+
return ((start < dev_size) && (len <= (dev_size - start)));
}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 11a98df298e..2717a355dc5 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1236,6 +1236,7 @@ void dm_put(struct mapped_device *md)
free_dev(md);
}
}
+EXPORT_SYMBOL_GPL(dm_put);
/*
* Process the deferred bios
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index b46f6c575f7..dbc234e3c69 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -1,5 +1,6 @@
/*
* Copyright (C) 2002 Sistina Software (UK) Limited.
+ * Copyright (C) 2006 Red Hat GmbH
*
* This file is released under the GPL.
*
@@ -45,6 +46,8 @@ struct kcopyd_client {
unsigned int nr_pages;
unsigned int nr_free_pages;
+ struct dm_io_client *io_client;
+
wait_queue_head_t destroyq;
atomic_t nr_jobs;
};
@@ -342,16 +345,20 @@ static void complete_io(unsigned long error, void *context)
static int run_io_job(struct kcopyd_job *job)
{
int r;
+ struct dm_io_request io_req = {
+ .bi_rw = job->rw,
+ .mem.type = DM_IO_PAGE_LIST,
+ .mem.ptr.pl = job->pages,
+ .mem.offset = job->offset,
+ .notify.fn = complete_io,
+ .notify.context = job,
+ .client = job->kc->io_client,
+ };
if (job->rw == READ)
- r = dm_io_async(1, &job->source, job->rw,
- job->pages,
- job->offset, complete_io, job);
-
+ r = dm_io(&io_req, 1, &job->source, NULL);
else
- r = dm_io_async(job->num_dests, job->dests, job->rw,
- job->pages,
- job->offset, complete_io, job);
+ r = dm_io(&io_req, job->num_dests, job->dests, NULL);
return r;
}
@@ -670,8 +677,9 @@ int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result)
return r;
}
- r = dm_io_get(nr_pages);
- if (r) {
+ kc->io_client = dm_io_client_create(nr_pages);
+ if (IS_ERR(kc->io_client)) {
+ r = PTR_ERR(kc->io_client);
client_free_pages(kc);
kfree(kc);
kcopyd_exit();
@@ -691,7 +699,7 @@ void kcopyd_client_destroy(struct kcopyd_client *kc)
/* Wait for completion of all jobs submitted by this client. */
wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs));
- dm_io_put(kc->nr_pages);
+ dm_io_client_destroy(kc->io_client);
client_free_pages(kc);
client_del(kc);
kfree(kc);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2b4315d7e5d..2901d0c0ee9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -33,6 +33,7 @@
*/
#include <linux/module.h>
+#include <linux/kernel.h>
#include <linux/kthread.h>
#include <linux/linkage.h>
#include <linux/raid/md.h>
@@ -273,6 +274,7 @@ static mddev_t * mddev_find(dev_t unit)
atomic_set(&new->active, 1);
spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait);
+ new->reshape_position = MaxSector;
new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) {
@@ -589,14 +591,41 @@ abort:
return ret;
}
+
+static u32 md_csum_fold(u32 csum)
+{
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (csum & 0xffff) + (csum >> 16);
+}
+
static unsigned int calc_sb_csum(mdp_super_t * sb)
{
+ u64 newcsum = 0;
+ u32 *sb32 = (u32*)sb;
+ int i;
unsigned int disk_csum, csum;
disk_csum = sb->sb_csum;
sb->sb_csum = 0;
- csum = csum_partial((void *)sb, MD_SB_BYTES, 0);
+
+ for (i = 0; i < MD_SB_BYTES/4 ; i++)
+ newcsum += sb32[i];
+ csum = (newcsum & 0xffffffff) + (newcsum>>32);
+
+
+#ifdef CONFIG_ALPHA
+ /* This used to use csum_partial, which was wrong for several
+ * reasons including that different results are returned on
+ * different architectures. It isn't critical that we get exactly
+ * the same return value as before (we always csum_fold before
+ * testing, and that removes any differences). However as we
+ * know that csum_partial always returned a 16bit value on
+ * alphas, do a fold to maximise conformity to previous behaviour.
+ */
+ sb->sb_csum = md_csum_fold(disk_csum);
+#else
sb->sb_csum = disk_csum;
+#endif
return csum;
}
@@ -684,7 +713,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
if (sb->raid_disks <= 0)
goto abort;
- if (csum_fold(calc_sb_csum(sb)) != csum_fold(sb->sb_csum)) {
+ if (md_csum_fold(calc_sb_csum(sb)) != md_csum_fold(sb->sb_csum)) {
printk(KERN_WARNING "md: invalid superblock checksum on %s\n",
b);
goto abort;
@@ -694,6 +723,17 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version
rdev->data_offset = 0;
rdev->sb_size = MD_SB_BYTES;
+ if (sb->state & (1<<MD_SB_BITMAP_PRESENT)) {
+ if (sb->level != 1 && sb->level != 4
+ && sb->level != 5 && sb->level != 6
+ && sb->level != 10) {
+ /* FIXME use a better test */
+ printk(KERN_WARNING
+ "md: bitmaps not supported for this level.\n");
+ goto abort;
+ }
+ }
+
if (sb->level == LEVEL_MULTIPATH)
rdev->desc_nr = -1;
else
@@ -792,16 +832,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
mddev->max_disks = MD_SB_DISKS;
if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
- mddev->bitmap_file == NULL) {
- if (mddev->level != 1 && mddev->level != 4
- && mddev->level != 5 && mddev->level != 6
- && mddev->level != 10) {
- /* FIXME use a better test */
- printk(KERN_WARNING "md: bitmaps not supported for this level.\n");
- return -EINVAL;
- }
+ mddev->bitmap_file == NULL)
mddev->bitmap_offset = mddev->default_bitmap_offset;
- }
} else if (mddev->pers == NULL) {
/* Insist on good event counter while assembling */
@@ -1058,6 +1090,18 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
bdevname(rdev->bdev,b));
return -EINVAL;
}
+ if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET)) {
+ if (sb->level != cpu_to_le32(1) &&
+ sb->level != cpu_to_le32(4) &&
+ sb->level != cpu_to_le32(5) &&
+ sb->level != cpu_to_le32(6) &&
+ sb->level != cpu_to_le32(10)) {
+ printk(KERN_WARNING
+ "md: bitmaps not supported for this level.\n");
+ return -EINVAL;
+ }
+ }
+
rdev->preferred_minor = 0xffff;
rdev->data_offset = le64_to_cpu(sb->data_offset);
atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
@@ -1141,14 +1185,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
mddev->max_disks = (4096-256)/2;
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
- mddev->bitmap_file == NULL ) {
- if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6
- && mddev->level != 10) {
- printk(KERN_WARNING "md: bitmaps not supported for this level.\n");
- return -EINVAL;
- }
+ mddev->bitmap_file == NULL )
mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
- }
+
if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
mddev->reshape_position = le64_to_cpu(sb->reshape_position);
mddev->delta_disks = le32_to_cpu(sb->delta_disks);
@@ -2204,6 +2243,10 @@ static ssize_t
layout_show(mddev_t *mddev, char *page)
{
/* just a number, not meaningful for all levels */
+ if (mddev->reshape_position != MaxSector &&
+ mddev->layout != mddev->new_layout)
+ return sprintf(page, "%d (%d)\n",
+ mddev->new_layout, mddev->layout);
return sprintf(page, "%d\n", mddev->layout);
}
@@ -2212,13 +2255,16 @@ layout_store(mddev_t *mddev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
- if (mddev->pers)
- return -EBUSY;
if (!*buf || (*e && *e != '\n'))
return -EINVAL;
- mddev->layout = n;
+ if (mddev->pers)
+ return -EBUSY;
+ if (mddev->reshape_position != MaxSector)
+ mddev->new_layout = n;
+ else
+ mddev->layout = n;
return len;
}
static struct md_sysfs_entry md_layout =
@@ -2230,6 +2276,10 @@ raid_disks_show(mddev_t *mddev, char *page)
{
if (mddev->raid_disks == 0)
return 0;
+ if (mddev->reshape_position != MaxSector &&
+ mddev->delta_disks != 0)
+ return sprintf(page, "%d (%d)\n", mddev->raid_disks,
+ mddev->raid_disks - mddev->delta_disks);
return sprintf(page, "%d\n", mddev->raid_disks);
}
@@ -2247,7 +2297,11 @@ raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
if (mddev->pers)
rv = update_raid_disks(mddev, n);
- else
+ else if (mddev->reshape_position != MaxSector) {
+ int olddisks = mddev->raid_disks - mddev->delta_disks;
+ mddev->delta_disks = n - olddisks;
+ mddev->raid_disks = n;
+ } else
mddev->raid_disks = n;
return rv ? rv : len;
}
@@ -2257,6 +2311,10 @@ __ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
static ssize_t
chunk_size_show(mddev_t *mddev, char *page)
{
+ if (mddev->reshape_position != MaxSector &&
+ mddev->chunk_size != mddev->new_chunk)
+ return sprintf(page, "%d (%d)\n", mddev->new_chunk,
+ mddev->chunk_size);
return sprintf(page, "%d\n", mddev->chunk_size);
}
@@ -2267,12 +2325,15 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
- if (mddev->pers)
- return -EBUSY;
if (!*buf || (*e && *e != '\n'))
return -EINVAL;
- mddev->chunk_size = n;
+ if (mddev->pers)
+ return -EBUSY;
+ else if (mddev->reshape_position != MaxSector)
+ mddev->new_chunk = n;
+ else
+ mddev->chunk_size = n;
return len;
}
static struct md_sysfs_entry md_chunk_size =
@@ -2637,8 +2698,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
minor = simple_strtoul(buf, &e, 10);
if (e==buf || (*e && *e != '\n') )
return -EINVAL;
- if (major >= sizeof(super_types)/sizeof(super_types[0]) ||
- super_types[major].name == NULL)
+ if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
return -ENOENT;
mddev->major_version = major;
mddev->minor_version = minor;
@@ -2859,6 +2919,37 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
static struct md_sysfs_entry md_suspend_hi =
__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
+static ssize_t
+reshape_position_show(mddev_t *mddev, char *page)
+{
+ if (mddev->reshape_position != MaxSector)
+ return sprintf(page, "%llu\n",
+ (unsigned long long)mddev->reshape_position);
+ strcpy(page, "none\n");
+ return 5;
+}
+
+static ssize_t
+reshape_position_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ char *e;
+ unsigned long long new = simple_strtoull(buf, &e, 10);
+ if (mddev->pers)
+ return -EBUSY;
+ if (buf == e || (*e && *e != '\n'))
+ return -EINVAL;
+ mddev->reshape_position = new;
+ mddev->delta_disks = 0;
+ mddev->new_level = mddev->level;
+ mddev->new_layout = mddev->layout;
+ mddev->new_chunk = mddev->chunk_size;
+ return len;
+}
+
+static struct md_sysfs_entry md_reshape_position =
+__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
+ reshape_position_store);
+
static struct attribute *md_default_attrs[] = {
&md_level.attr,
@@ -2871,6 +2962,7 @@ static struct attribute *md_default_attrs[] = {
&md_new_device.attr,
&md_safe_delay.attr,
&md_array_state.attr,
+ &md_reshape_position.attr,
NULL,
};
@@ -3012,6 +3104,7 @@ static int do_md_run(mddev_t * mddev)
struct gendisk *disk;
struct mdk_personality *pers;
char b[BDEVNAME_SIZE];
+ struct block_device *bdev;
if (list_empty(&mddev->disks))
/* cannot run an array with no devices.. */
@@ -3239,7 +3332,13 @@ static int do_md_run(mddev_t * mddev)
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
- mddev->changed = 1;
+ bdev = bdget_disk(mddev->gendisk, 0);
+ if (bdev) {
+ bd_set_size(bdev, mddev->array_size << 1);
+ blkdev_ioctl(bdev->bd_inode, NULL, BLKRRPART, 0);
+ bdput(bdev);
+ }
+
md_new_event(mddev);
kobject_uevent(&mddev->gendisk->kobj, KOBJ_CHANGE);
return 0;
@@ -3361,7 +3460,6 @@ static int do_md_stop(mddev_t * mddev, int mode)
mddev->pers = NULL;
set_capacity(disk, 0);
- mddev->changed = 1;
if (mddev->ro)
mddev->ro = 0;
@@ -3409,6 +3507,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
mddev->size = 0;
mddev->raid_disks = 0;
mddev->recovery_cp = 0;
+ mddev->reshape_position = MaxSector;
} else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -4019,7 +4118,7 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
if (info->raid_disks == 0) {
/* just setting version number for superblock loading */
if (info->major_version < 0 ||
- info->major_version >= sizeof(super_types)/sizeof(super_types[0]) ||
+ info->major_version >= ARRAY_SIZE(super_types) ||
super_types[info->major_version].name == NULL) {
/* maybe try to auto-load a module? */
printk(KERN_INFO
@@ -4500,20 +4599,6 @@ static int md_release(struct inode *inode, struct file * file)
return 0;
}
-static int md_media_changed(struct gendisk *disk)
-{
- mddev_t *mddev = disk->private_data;
-
- return mddev->changed;
-}
-
-static int md_revalidate(struct gendisk *disk)
-{
- mddev_t *mddev = disk->private_data;
-
- mddev->changed = 0;
- return 0;
-}
static struct block_device_operations md_fops =
{
.owner = THIS_MODULE,
@@ -4521,8 +4606,6 @@ static struct block_device_operations md_fops =
.release = md_release,
.ioctl = md_ioctl,
.getgeo = md_getgeo,
- .media_changed = md_media_changed,
- .revalidate_disk= md_revalidate,
};
static int md_thread(void * arg)
@@ -4941,15 +5024,6 @@ static int md_seq_open(struct inode *inode, struct file *file)
return error;
}
-static int md_seq_release(struct inode *inode, struct file *file)
-{
- struct seq_file *m = file->private_data;
- struct mdstat_info *mi = m->private;
- m->private = NULL;
- kfree(mi);
- return seq_release(inode, file);
-}
-
static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
{
struct seq_file *m = filp->private_data;
@@ -4971,7 +5045,7 @@ static const struct file_operations md_seq_fops = {
.open = md_seq_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = md_seq_release,
+ .release = seq_release_private,
.poll = mdstat_poll,
};
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 97ee870b265..1b7130cad21 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2063,7 +2063,6 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
*/
mddev->array_size = sectors>>1;
set_capacity(mddev->gendisk, mddev->array_size << 1);
- mddev->changed = 1;
if (mddev->array_size > mddev->size && mddev->recovery_cp == MaxSector) {
mddev->recovery_cp = mddev->size << 1;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 8d59914f205..a72e70ad097 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -353,8 +353,8 @@ static int grow_stripes(raid5_conf_t *conf, int num)
struct kmem_cache *sc;
int devs = conf->raid_disks;
- sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev));
- sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev));
+ sprintf(conf->cache_name[0], "raid5-%s", mdname(conf->mddev));
+ sprintf(conf->cache_name[1], "raid5-%s-alt", mdname(conf->mddev));
conf->active_name = 0;
sc = kmem_cache_create(conf->cache_name[conf->active_name],
sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev),
@@ -3864,7 +3864,6 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
sectors &= ~((sector_t)mddev->chunk_size/512 - 1);
mddev->array_size = (sectors * (mddev->raid_disks-conf->max_degraded))>>1;
set_capacity(mddev->gendisk, mddev->array_size << 1);
- mddev->changed = 1;
if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) {
mddev->recovery_cp = mddev->size << 1;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -3999,7 +3998,6 @@ static void end_reshape(raid5_conf_t *conf)
conf->mddev->array_size = conf->mddev->size *
(conf->raid_disks - conf->max_degraded);
set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1);
- conf->mddev->changed = 1;
bdev = bdget_disk(conf->mddev->gendisk, 0);
if (bdev) {
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index b6c16704aaa..7385acfa1dd 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -501,9 +501,9 @@ void mmc_detect_change(struct mmc_host *host, unsigned long delay)
{
#ifdef CONFIG_MMC_DEBUG
unsigned long flags;
- spin_lock_irqsave(host->lock, flags);
+ spin_lock_irqsave(&host->lock, flags);
BUG_ON(host->removed);
- spin_unlock_irqrestore(host->lock, flags);
+ spin_unlock_irqrestore(&host->lock, flags);
#endif
mmc_schedule_delayed_work(&host->detect, delay);
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 3a03a74c060..637ae8f6879 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -1214,7 +1214,7 @@ e1000_remove(struct pci_dev *pdev)
int i;
#endif
- flush_scheduled_work();
+ cancel_work_sync(&adapter->reset_task);
e1000_release_manageability(adapter);
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index eed433d6056..f71dab34766 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -662,10 +662,10 @@ int phy_stop_interrupts(struct phy_device *phydev)
phy_error(phydev);
/*
- * Finish any pending work; we might have been scheduled
- * to be called from keventd ourselves, though.
+ * Finish any pending work; we might have been scheduled to be called
+ * from keventd ourselves, but cancel_work_sync() handles that.
*/
- run_scheduled_work(&phydev->phy_queue);
+ cancel_work_sync(&phydev->phy_queue);
free_irq(phydev->irq, phydev);
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index e5e901ecd80..923b9c725cc 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -3716,10 +3716,8 @@ static void tg3_reset_task(struct work_struct *work)
unsigned int restart_timer;
tg3_full_lock(tp, 0);
- tp->tg3_flags |= TG3_FLAG_IN_RESET_TASK;
if (!netif_running(tp->dev)) {
- tp->tg3_flags &= ~TG3_FLAG_IN_RESET_TASK;
tg3_full_unlock(tp);
return;
}
@@ -3750,8 +3748,6 @@ static void tg3_reset_task(struct work_struct *work)
mod_timer(&tp->timer, jiffies + 1);
out:
- tp->tg3_flags &= ~TG3_FLAG_IN_RESET_TASK;
-
tg3_full_unlock(tp);
}
@@ -7390,12 +7386,7 @@ static int tg3_close(struct net_device *dev)
{
struct tg3 *tp = netdev_priv(dev);
- /* Calling flush_scheduled_work() may deadlock because
- * linkwatch_event() may be on the workqueue and it will try to get
- * the rtnl_lock which we are holding.
- */
- while (tp->tg3_flags & TG3_FLAG_IN_RESET_TASK)
- msleep(1);
+ cancel_work_sync(&tp->reset_task);
netif_stop_queue(dev);
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 4d334cf5a24..bd9f4f428e5 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2228,7 +2228,7 @@ struct tg3 {
#define TG3_FLAG_JUMBO_RING_ENABLE 0x00800000
#define TG3_FLAG_10_100_ONLY 0x01000000
#define TG3_FLAG_PAUSE_AUTONEG 0x02000000
-#define TG3_FLAG_IN_RESET_TASK 0x04000000
+
#define TG3_FLAG_40BIT_DMA_BUG 0x08000000
#define TG3_FLAG_BROKEN_CHECKSUMS 0x10000000
#define TG3_FLAG_SUPPORT_MSI 0x20000000
diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c
index 66e7bc98579..1d8a2f6bb8e 100644
--- a/drivers/spi/atmel_spi.c
+++ b/drivers/spi/atmel_spi.c
@@ -22,10 +22,7 @@
#include <asm/io.h>
#include <asm/arch/board.h>
#include <asm/arch/gpio.h>
-
-#ifdef CONFIG_ARCH_AT91
#include <asm/arch/cpu.h>
-#endif
#include "atmel_spi.h"
@@ -552,10 +549,8 @@ static int __init atmel_spi_probe(struct platform_device *pdev)
goto out_free_buffer;
as->irq = irq;
as->clk = clk;
-#ifdef CONFIG_ARCH_AT91
if (!cpu_is_at91rm9200())
as->new_1 = 1;
-#endif
ret = request_irq(irq, atmel_spi_interrupt, 0,
pdev->dev.bus_id, master);
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index b082d95bbba..11e9b15ca45 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -1033,7 +1033,7 @@ static int usbatm_do_heavy_init(void *arg)
static int usbatm_heavy_init(struct usbatm_data *instance)
{
- int ret = kernel_thread(usbatm_do_heavy_init, instance, CLONE_KERNEL);
+ int ret = kernel_thread(usbatm_do_heavy_init, instance, CLONE_FS | CLONE_FILES);
if (ret < 0) {
usb_err(instance, "%s: failed to create kernel_thread (%d)!\n", __func__, ret);
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 1132ba5ff39..9a256d2ff9d 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -1348,6 +1348,20 @@ config FB_VOODOO1
Please read the <file:Documentation/fb/README-sstfb.txt> for supported
options and other important info support.
+config FB_VT8623
+ tristate "VIA VT8623 support"
+ depends on FB && PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_TILEBLITTING
+ select FB_SVGALIB
+ select VGASTATE
+ select FONT_8x16 if FRAMEBUFFER_CONSOLE
+ ---help---
+ Driver for CastleRock integrated graphics core in the
+ VIA VT8623 [Apollo CLE266] chipset.
+
config FB_CYBLA
tristate "Cyberblade/i1 support"
depends on FB && PCI && X86_32 && !64BIT
@@ -1401,6 +1415,20 @@ config FB_TRIDENT_ACCEL
This will compile the Trident frame buffer device with
acceleration functions.
+config FB_ARK
+ tristate "ARK 2000PV support"
+ depends on FB && PCI
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ select FB_TILEBLITTING
+ select FB_SVGALIB
+ select VGASTATE
+ select FONT_8x16 if FRAMEBUFFER_CONSOLE
+ ---help---
+ Driver for PCI graphics boards with ARK 2000PV chip
+ and ICS 5342 RAMDAC.
+
config FB_PM3
tristate "Permedia3 support"
depends on FB && PCI && BROKEN
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index a916c204274..0b70567458f 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -54,10 +54,12 @@ obj-$(CONFIG_FB_VALKYRIE) += valkyriefb.o
obj-$(CONFIG_FB_CT65550) += chipsfb.o
obj-$(CONFIG_FB_IMSTT) += imsttfb.o
obj-$(CONFIG_FB_FM2) += fm2fb.o
+obj-$(CONFIG_FB_VT8623) += vt8623fb.o
obj-$(CONFIG_FB_CYBLA) += cyblafb.o
obj-$(CONFIG_FB_TRIDENT) += tridentfb.o
obj-$(CONFIG_FB_LE80578) += vermilion/
obj-$(CONFIG_FB_S3) += s3fb.o
+obj-$(CONFIG_FB_ARK) += arkfb.o
obj-$(CONFIG_FB_STI) += stifb.o
obj-$(CONFIG_FB_FFB) += ffb.o sbuslib.o
obj-$(CONFIG_FB_CG6) += cg6.o sbuslib.o
diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c
new file mode 100644
index 00000000000..ba6fede5c46
--- /dev/null
+++ b/drivers/video/arkfb.c
@@ -0,0 +1,1200 @@
+/*
+ * linux/drivers/video/arkfb.c -- Frame buffer device driver for ARK 2000PV
+ * with ICS 5342 dac (it is easy to add support for different dacs).
+ *
+ * Copyright (c) 2007 Ondrej Zajicek <santiago@crfreenet.org>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive for
+ * more details.
+ *
+ * Code is based on s3fb
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/svga.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/console.h> /* Why should fb driver call console functions? because acquire_console_sem() */
+#include <video/vga.h>
+
+#ifdef CONFIG_MTRR
+#include <asm/mtrr.h>
+#endif
+
+struct arkfb_info {
+ int mclk_freq;
+ int mtrr_reg;
+
+ struct dac_info *dac;
+ struct vgastate state;
+ struct mutex open_lock;
+ unsigned int ref_count;
+ u32 pseudo_palette[16];
+};
+
+
+/* ------------------------------------------------------------------------- */
+
+
+static const struct svga_fb_format arkfb_formats[] = {
+ { 0, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0,
+ FB_TYPE_TEXT, FB_AUX_TEXT_SVGA_STEP4, FB_VISUAL_PSEUDOCOLOR, 8, 8},
+ { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 8, 16},
+ { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 1,
+ FB_TYPE_INTERLEAVED_PLANES, 1, FB_VISUAL_PSEUDOCOLOR, 8, 16},
+ { 8, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 8, 8},
+ {16, {10, 5, 0}, {5, 5, 0}, {0, 5, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 4, 4},
+ {16, {11, 5, 0}, {5, 6, 0}, {0, 5, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 4, 4},
+ {24, {16, 8, 0}, {8, 8, 0}, {0, 8, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 8, 8},
+ {32, {16, 8, 0}, {8, 8, 0}, {0, 8, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 2, 2},
+ SVGA_FORMAT_END
+};
+
+
+/* CRT timing register sets */
+
+static const struct vga_regset ark_h_total_regs[] = {{0x00, 0, 7}, {0x41, 7, 7}, VGA_REGSET_END};
+static const struct vga_regset ark_h_display_regs[] = {{0x01, 0, 7}, {0x41, 6, 6}, VGA_REGSET_END};
+static const struct vga_regset ark_h_blank_start_regs[] = {{0x02, 0, 7}, {0x41, 5, 5}, VGA_REGSET_END};
+static const struct vga_regset ark_h_blank_end_regs[] = {{0x03, 0, 4}, {0x05, 7, 7 }, VGA_REGSET_END};
+static const struct vga_regset ark_h_sync_start_regs[] = {{0x04, 0, 7}, {0x41, 4, 4}, VGA_REGSET_END};
+static const struct vga_regset ark_h_sync_end_regs[] = {{0x05, 0, 4}, VGA_REGSET_END};
+
+static const struct vga_regset ark_v_total_regs[] = {{0x06, 0, 7}, {0x07, 0, 0}, {0x07, 5, 5}, {0x40, 7, 7}, VGA_REGSET_END};
+static const struct vga_regset ark_v_display_regs[] = {{0x12, 0, 7}, {0x07, 1, 1}, {0x07, 6, 6}, {0x40, 6, 6}, VGA_REGSET_END};
+static const struct vga_regset ark_v_blank_start_regs[] = {{0x15, 0, 7}, {0x07, 3, 3}, {0x09, 5, 5}, {0x40, 5, 5}, VGA_REGSET_END};
+// const struct vga_regset ark_v_blank_end_regs[] = {{0x16, 0, 6}, VGA_REGSET_END};
+static const struct vga_regset ark_v_blank_end_regs[] = {{0x16, 0, 7}, VGA_REGSET_END};
+static const struct vga_regset ark_v_sync_start_regs[] = {{0x10, 0, 7}, {0x07, 2, 2}, {0x07, 7, 7}, {0x40, 4, 4}, VGA_REGSET_END};
+static const struct vga_regset ark_v_sync_end_regs[] = {{0x11, 0, 3}, VGA_REGSET_END};
+
+static const struct vga_regset ark_line_compare_regs[] = {{0x18, 0, 7}, {0x07, 4, 4}, {0x09, 6, 6}, VGA_REGSET_END};
+static const struct vga_regset ark_start_address_regs[] = {{0x0d, 0, 7}, {0x0c, 0, 7}, {0x40, 0, 2}, VGA_REGSET_END};
+static const struct vga_regset ark_offset_regs[] = {{0x13, 0, 7}, {0x41, 3, 3}, VGA_REGSET_END};
+
+static const struct svga_timing_regs ark_timing_regs = {
+ ark_h_total_regs, ark_h_display_regs, ark_h_blank_start_regs,
+ ark_h_blank_end_regs, ark_h_sync_start_regs, ark_h_sync_end_regs,
+ ark_v_total_regs, ark_v_display_regs, ark_v_blank_start_regs,
+ ark_v_blank_end_regs, ark_v_sync_start_regs, ark_v_sync_end_regs,
+};
+
+
+/* ------------------------------------------------------------------------- */
+
+
+/* Module parameters */
+
+static char *mode = "640x480-8@60";
+
+#ifdef CONFIG_MTRR
+static int mtrr = 1;
+#endif
+
+MODULE_AUTHOR("(c) 2007 Ondrej Zajicek <santiago@crfreenet.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("fbdev driver for ARK 2000PV");
+
+module_param(mode, charp, 0444);
+MODULE_PARM_DESC(mode, "Default video mode ('640x480-8@60', etc)");
+
+#ifdef CONFIG_MTRR
+module_param(mtrr, int, 0444);
+MODULE_PARM_DESC(mtrr, "Enable write-combining with MTRR (1=enable, 0=disable, default=1)");
+#endif
+
+static int threshold = 4;
+
+module_param(threshold, int, 0644);
+MODULE_PARM_DESC(threshold, "FIFO threshold");
+
+
+/* ------------------------------------------------------------------------- */
+
+
+static void arkfb_settile(struct fb_info *info, struct fb_tilemap *map)
+{
+ const u8 *font = map->data;
+ u8 __iomem *fb = (u8 __iomem *)info->screen_base;
+ int i, c;
+
+ if ((map->width != 8) || (map->height != 16) ||
+ (map->depth != 1) || (map->length != 256)) {
+ printk(KERN_ERR "fb%d: unsupported font parameters: width %d, "
+ "height %d, depth %d, length %d\n", info->node,
+ map->width, map->height, map->depth, map->length);
+ return;
+ }
+
+ fb += 2;
+ for (c = 0; c < map->length; c++) {
+ for (i = 0; i < map->height; i++) {
+ fb_writeb(font[i], &fb[i * 4]);
+ fb_writeb(font[i], &fb[i * 4 + (128 * 8)]);
+ }
+ fb += 128;
+
+ if ((c % 8) == 7)
+ fb += 128*8;
+
+ font += map->height;
+ }
+}
+
+static struct fb_tile_ops arkfb_tile_ops = {
+ .fb_settile = arkfb_settile,
+ .fb_tilecopy = svga_tilecopy,
+ .fb_tilefill = svga_tilefill,
+ .fb_tileblit = svga_tileblit,
+ .fb_tilecursor = svga_tilecursor,
+ .fb_get_tilemax = svga_get_tilemax,
+};
+
+
+/* ------------------------------------------------------------------------- */
+
+
+/* image data is MSB-first, fb structure is MSB-first too */
+static inline u32 expand_color(u32 c)
+{
+ return ((c & 1) | ((c & 2) << 7) | ((c & 4) << 14) | ((c & 8) << 21)) * 0xFF;
+}
+
+/* arkfb_iplan_imageblit silently assumes that almost everything is 8-pixel aligned */
+static void arkfb_iplan_imageblit(struct fb_info *info, const struct fb_image *image)
+{
+ u32 fg = expand_color(image->fg_color);
+ u32 bg = expand_color(image->bg_color);
+ const u8 *src1, *src;
+ u8 __iomem *dst1;
+ u32 __iomem *dst;
+ u32 val;
+ int x, y;
+
+ src1 = image->data;
+ dst1 = info->screen_base + (image->dy * info->fix.line_length)
+ + ((image->dx / 8) * 4);
+
+ for (y = 0; y < image->height; y++) {
+ src = src1;
+ dst = (u32 __iomem *) dst1;
+ for (x = 0; x < image->width; x += 8) {
+ val = *(src++) * 0x01010101;
+ val = (val & fg) | (~val & bg);
+ fb_writel(val, dst++);
+ }
+ src1 += image->width / 8;
+ dst1 += info->fix.line_length;
+ }
+
+}
+
+/* arkfb_iplan_fillrect silently assumes that almost everything is 8-pixel aligned */
+static void arkfb_iplan_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+ u32 fg = expand_color(rect->color);
+ u8 __iomem *dst1;
+ u32 __iomem *dst;
+ int x, y;
+
+ dst1 = info->screen_base + (rect->dy * info->fix.line_length)
+ + ((rect->dx / 8) * 4);
+
+ for (y = 0; y < rect->height; y++) {
+ dst = (u32 __iomem *) dst1;
+ for (x = 0; x < rect->width; x += 8) {
+ fb_writel(fg, dst++);
+ }
+ dst1 += info->fix.line_length;
+ }
+
+}
+
+
+/* image data is MSB-first, fb structure is high-nibble-in-low-byte-first */
+static inline u32 expand_pixel(u32 c)
+{
+ return (((c & 1) << 24) | ((c & 2) << 27) | ((c & 4) << 14) | ((c & 8) << 17) |
+ ((c & 16) << 4) | ((c & 32) << 7) | ((c & 64) >> 6) | ((c & 128) >> 3)) * 0xF;
+}
+
+/* arkfb_cfb4_imageblit silently assumes that almost everything is 8-pixel aligned */
+static void arkfb_cfb4_imageblit(struct fb_info *info, const struct fb_image *image)
+{
+ u32 fg = image->fg_color * 0x11111111;
+ u32 bg = image->bg_color * 0x11111111;
+ const u8 *src1, *src;
+ u8 __iomem *dst1;
+ u32 __iomem *dst;
+ u32 val;
+ int x, y;
+
+ src1 = image->data;
+ dst1 = info->screen_base + (image->dy * info->fix.line_length)
+ + ((image->dx / 8) * 4);
+
+ for (y = 0; y < image->height; y++) {
+ src = src1;
+ dst = (u32 __iomem *) dst1;
+ for (x = 0; x < image->width; x += 8) {
+ val = expand_pixel(*(src++));
+ val = (val & fg) | (~val & bg);
+ fb_writel(val, dst++);
+ }
+ src1 += image->width / 8;
+ dst1 += info->fix.line_length;
+ }
+
+}
+
+static void arkfb_imageblit(struct fb_info *info, const struct fb_image *image)
+{
+ if ((info->var.bits_per_pixel == 4) && (image->depth == 1)
+ && ((image->width % 8) == 0) && ((image->dx % 8) == 0)) {
+ if (info->fix.type == FB_TYPE_INTERLEAVED_PLANES)
+ arkfb_iplan_imageblit(info, image);
+ else
+ arkfb_cfb4_imageblit(info, image);
+ } else
+ cfb_imageblit(info, image);
+}
+
+static void arkfb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+ if ((info->var.bits_per_pixel == 4)
+ && ((rect->width % 8) == 0) && ((rect->dx % 8) == 0)
+ && (info->fix.type == FB_TYPE_INTERLEAVED_PLANES))
+ arkfb_iplan_fillrect(info, rect);
+ else
+ cfb_fillrect(info, rect);
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+
+enum
+{
+ DAC_PSEUDO8_8,
+ DAC_RGB1555_8,
+ DAC_RGB0565_8,
+ DAC_RGB0888_8,
+ DAC_RGB8888_8,
+ DAC_PSEUDO8_16,
+ DAC_RGB1555_16,
+ DAC_RGB0565_16,
+ DAC_RGB0888_16,
+ DAC_RGB8888_16,
+ DAC_MAX
+};
+
+struct dac_ops {
+ int (*dac_get_mode)(struct dac_info *info);
+ int (*dac_set_mode)(struct dac_info *info, int mode);
+ int (*dac_get_freq)(struct dac_info *info, int channel);
+ int (*dac_set_freq)(struct dac_info *info, int channel, u32 freq);
+ void (*dac_release)(struct dac_info *info);
+};
+
+typedef void (*dac_read_regs_t)(void *data, u8 *code, int count);
+typedef void (*dac_write_regs_t)(void *data, u8 *code, int count);
+
+struct dac_info
+{
+ struct dac_ops *dacops;
+ dac_read_regs_t dac_read_regs;
+ dac_write_regs_t dac_write_regs;
+ void *data;
+};
+
+
+static inline u8 dac_read_reg(struct dac_info *info, u8 reg)
+{
+ u8 code[2] = {reg, 0};
+ info->dac_read_regs(info->data, code, 1);
+ return code[1];
+}
+
+static inline void dac_read_regs(struct dac_info *info, u8 *code, int count)
+{
+ info->dac_read_regs(info->data, code, count);
+}
+
+static inline void dac_write_reg(struct dac_info *info, u8 reg, u8 val)
+{
+ u8 code[2] = {reg, val};
+ info->dac_write_regs(info->data, code, 1);
+}
+
+static inline void dac_write_regs(struct dac_info *info, u8 *code, int count)
+{
+ info->dac_write_regs(info->data, code, count);
+}
+
+static inline int dac_set_mode(struct dac_info *info, int mode)
+{
+ return info->dacops->dac_set_mode(info, mode);
+}
+
+static inline int dac_set_freq(struct dac_info *info, int channel, u32 freq)
+{
+ return info->dacops->dac_set_freq(info, channel, freq);
+}
+
+static inline void dac_release(struct dac_info *info)
+{
+ info->dacops->dac_release(info);
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+
+/* ICS5342 DAC */
+
+struct ics5342_info
+{
+ struct dac_info dac;
+ u8 mode;
+};
+
+#define DAC_PAR(info) ((struct ics5342_info *) info)
+
+/* LSB is set to distinguish unused slots */
+static const u8 ics5342_mode_table[DAC_MAX] = {
+ [DAC_PSEUDO8_8] = 0x01, [DAC_RGB1555_8] = 0x21, [DAC_RGB0565_8] = 0x61,
+ [DAC_RGB0888_8] = 0x41, [DAC_PSEUDO8_16] = 0x11, [DAC_RGB1555_16] = 0x31,
+ [DAC_RGB0565_16] = 0x51, [DAC_RGB0888_16] = 0x91, [DAC_RGB8888_16] = 0x71
+};
+
+static int ics5342_set_mode(struct dac_info *info, int mode)
+{
+ u8 code;
+
+ if (mode >= DAC_MAX)
+ return -EINVAL;
+
+ code = ics5342_mode_table[mode];
+
+ if (! code)
+ return -EINVAL;
+
+ dac_write_reg(info, 6, code & 0xF0);
+ DAC_PAR(info)->mode = mode;
+
+ return 0;
+}
+
+static const struct svga_pll ics5342_pll = {3, 129, 3, 33, 0, 3,
+ 60000, 250000, 14318};
+
+/* pd4 - allow only posdivider 4 (r=2) */
+static const struct svga_pll ics5342_pll_pd4 = {3, 129, 3, 33, 2, 2,
+ 60000, 335000, 14318};
+
+/* 270 MHz should be upper bound for VCO clock according to specs,
+ but that is too restrictive in pd4 case */
+
+static int ics5342_set_freq(struct dac_info *info, int channel, u32 freq)
+{
+ u16 m, n, r;
+
+ /* only postdivider 4 (r=2) is valid in mode DAC_PSEUDO8_16 */
+ int rv = svga_compute_pll((DAC_PAR(info)->mode == DAC_PSEUDO8_16)
+ ? &ics5342_pll_pd4 : &ics5342_pll,
+ freq, &m, &n, &r, 0);
+
+ if (rv < 0) {
+ return -EINVAL;
+ } else {
+ u8 code[6] = {4, 3, 5, m-2, 5, (n-2) | (r << 5)};
+ dac_write_regs(info, code, 3);
+ return 0;
+ }
+}
+
+static void ics5342_release(struct dac_info *info)
+{
+ ics5342_set_mode(info, DAC_PSEUDO8_8);
+ kfree(info);
+}
+
+static struct dac_ops ics5342_ops = {
+ .dac_set_mode = ics5342_set_mode,
+ .dac_set_freq = ics5342_set_freq,
+ .dac_release = ics5342_release
+};
+
+
+static struct dac_info * ics5342_init(dac_read_regs_t drr, dac_write_regs_t dwr, void *data)
+{
+ struct dac_info *info = kzalloc(sizeof(struct ics5342_info), GFP_KERNEL);
+
+ if (! info)
+ return NULL;
+
+ info->dacops = &ics5342_ops;
+ info->dac_read_regs = drr;
+ info->dac_write_regs = dwr;
+ info->data = data;
+ DAC_PAR(info)->mode = DAC_PSEUDO8_8; /* estimation */
+ return info;
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+
+static unsigned short dac_regs[4] = {0x3c8, 0x3c9, 0x3c6, 0x3c7};
+
+static void ark_dac_read_regs(void *data, u8 *code, int count)
+{
+ u8 regval = vga_rseq(NULL, 0x1C);
+
+ while (count != 0)
+ {
+ vga_wseq(NULL, 0x1C, regval | (code[0] & 4) ? 0x80 : 0);
+ code[1] = vga_r(NULL, dac_regs[code[0] & 3]);
+ count--;
+ code += 2;
+ }
+
+ vga_wseq(NULL, 0x1C, regval);
+}
+
+static void ark_dac_write_regs(void *data, u8 *code, int count)
+{
+ u8 regval = vga_rseq(NULL, 0x1C);
+
+ while (count != 0)
+ {
+ vga_wseq(NULL, 0x1C, regval | (code[0] & 4) ? 0x80 : 0);
+ vga_w(NULL, dac_regs[code[0] & 3], code[1]);
+ count--;
+ code += 2;
+ }
+
+ vga_wseq(NULL, 0x1C, regval);
+}
+
+
+static void ark_set_pixclock(struct fb_info *info, u32 pixclock)
+{
+ struct arkfb_info *par = info->par;
+ u8 regval;
+
+ int rv = dac_set_freq(par->dac, 0, 1000000000 / pixclock);
+ if (rv < 0) {
+ printk(KERN_ERR "fb%d: cannot set requested pixclock, keeping old value\n", info->node);
+ return;
+ }
+
+ /* Set VGA misc register */
+ regval = vga_r(NULL, VGA_MIS_R);
+ vga_w(NULL, VGA_MIS_W, regval | VGA_MIS_ENB_PLL_LOAD);
+}
+
+
+/* Open framebuffer */
+
+static int arkfb_open(struct fb_info *info, int user)
+{
+ struct arkfb_info *par = info->par;
+
+ mutex_lock(&(par->open_lock));
+ if (par->ref_count == 0) {
+ memset(&(par->state), 0, sizeof(struct vgastate));
+ par->state.flags = VGA_SAVE_MODE | VGA_SAVE_FONTS | VGA_SAVE_CMAP;
+ par->state.num_crtc = 0x60;
+ par->state.num_seq = 0x30;
+ save_vga(&(par->state));
+ }
+
+ par->ref_count++;
+ mutex_unlock(&(par->open_lock));
+
+ return 0;
+}
+
+/* Close framebuffer */
+
+static int arkfb_release(struct fb_info *info, int user)
+{
+ struct arkfb_info *par = info->par;
+
+ mutex_lock(&(par->open_lock));
+ if (par->ref_count == 0) {
+ mutex_unlock(&(par->open_lock));
+ return -EINVAL;
+ }
+
+ if (par->ref_count == 1) {
+ restore_vga(&(par->state));
+ dac_set_mode(par->dac, DAC_PSEUDO8_8);
+ }
+
+ par->ref_count--;
+ mutex_unlock(&(par->open_lock));
+
+ return 0;
+}
+
+/* Validate passed in var */
+
+static int arkfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+ int rv, mem, step;
+
+ /* Find appropriate format */
+ rv = svga_match_format (arkfb_formats, var, NULL);
+ if (rv < 0)
+ {
+ printk(KERN_ERR "fb%d: unsupported mode requested\n", info->node);
+ return rv;
+ }
+
+ /* Do not allow to have real resoulution larger than virtual */
+ if (var->xres > var->xres_virtual)
+ var->xres_virtual = var->xres;
+
+ if (var->yres > var->yres_virtual)
+ var->yres_virtual = var->yres;
+
+ /* Round up xres_virtual to have proper alignment of lines */
+ step = arkfb_formats[rv].xresstep - 1;
+ var->xres_virtual = (var->xres_virtual+step) & ~step;
+
+
+ /* Check whether have enough memory */
+ mem = ((var->bits_per_pixel * var->xres_virtual) >> 3) * var->yres_virtual;
+ if (mem > info->screen_size)
+ {
+ printk(KERN_ERR "fb%d: not enough framebuffer memory (%d kB requested , %d kB available)\n", info->node, mem >> 10, (unsigned int) (info->screen_size >> 10));
+ return -EINVAL;
+ }
+
+ rv = svga_check_timings (&ark_timing_regs, var, info->node);
+ if (rv < 0)
+ {
+ printk(KERN_ERR "fb%d: invalid timings requested\n", info->node);
+ return rv;
+ }
+
+ /* Interlaced mode is broken */
+ if (var->vmode & FB_VMODE_INTERLACED)
+ return -EINVAL;
+
+ return 0;
+}
+
+/* Set video mode from par */
+
+static int arkfb_set_par(struct fb_info *info)
+{
+ struct arkfb_info *par = info->par;
+ u32 value, mode, hmul, hdiv, offset_value, screen_size;
+ u32 bpp = info->var.bits_per_pixel;
+ u8 regval;
+
+ if (bpp != 0) {
+ info->fix.ypanstep = 1;
+ info->fix.line_length = (info->var.xres_virtual * bpp) / 8;
+
+ info->flags &= ~FBINFO_MISC_TILEBLITTING;
+ info->tileops = NULL;
+
+ /* in 4bpp supports 8p wide tiles only, any tiles otherwise */
+ info->pixmap.blit_x = (bpp == 4) ? (1 << (8 - 1)) : (~(u32)0);
+ info->pixmap.blit_y = ~(u32)0;
+
+ offset_value = (info->var.xres_virtual * bpp) / 64;
+ screen_size = info->var.yres_virtual * info->fix.line_length;
+ } else {
+ info->fix.ypanstep = 16;
+ info->fix.line_length = 0;
+
+ info->flags |= FBINFO_MISC_TILEBLITTING;
+ info->tileops = &arkfb_tile_ops;
+
+ /* supports 8x16 tiles only */
+ info->pixmap.blit_x = 1 << (8 - 1);
+ info->pixmap.blit_y = 1 << (16 - 1);
+
+ offset_value = info->var.xres_virtual / 16;
+ screen_size = (info->var.xres_virtual * info->var.yres_virtual) / 64;
+ }
+
+ info->var.xoffset = 0;
+ info->var.yoffset = 0;
+ info->var.activate = FB_ACTIVATE_NOW;
+
+ /* Unlock registers */
+ svga_wcrt_mask(0x11, 0x00, 0x80);
+
+ /* Blank screen and turn off sync */
+ svga_wseq_mask(0x01, 0x20, 0x20);
+ svga_wcrt_mask(0x17, 0x00, 0x80);
+
+ /* Set default values */
+ svga_set_default_gfx_regs();
+ svga_set_default_atc_regs();
+ svga_set_default_seq_regs();
+ svga_set_default_crt_regs();
+ svga_wcrt_multi(ark_line_compare_regs, 0xFFFFFFFF);
+ svga_wcrt_multi(ark_start_address_regs, 0);
+
+ /* ARK specific initialization */
+ svga_wseq_mask(0x10, 0x1F, 0x1F); /* enable linear framebuffer and full memory access */
+ svga_wseq_mask(0x12, 0x03, 0x03); /* 4 MB linear framebuffer size */
+
+ vga_wseq(NULL, 0x13, info->fix.smem_start >> 16);
+ vga_wseq(NULL, 0x14, info->fix.smem_start >> 24);
+ vga_wseq(NULL, 0x15, 0);
+ vga_wseq(NULL, 0x16, 0);
+
+ /* Set the FIFO threshold register */
+ /* It is fascinating way to store 5-bit value in 8-bit register */
+ regval = 0x10 | ((threshold & 0x0E) >> 1) | (threshold & 0x01) << 7 | (threshold & 0x10) << 1;
+ vga_wseq(NULL, 0x18, regval);
+
+ /* Set the offset register */
+ pr_debug("fb%d: offset register : %d\n", info->node, offset_value);
+ svga_wcrt_multi(ark_offset_regs, offset_value);
+
+ /* fix for hi-res textmode */
+ svga_wcrt_mask(0x40, 0x08, 0x08);
+
+ if (info->var.vmode & FB_VMODE_DOUBLE)
+ svga_wcrt_mask(0x09, 0x80, 0x80);
+ else
+ svga_wcrt_mask(0x09, 0x00, 0x80);
+
+ if (info->var.vmode & FB_VMODE_INTERLACED)
+ svga_wcrt_mask(0x44, 0x04, 0x04);
+ else
+ svga_wcrt_mask(0x44, 0x00, 0x04);
+
+ hmul = 1;
+ hdiv = 1;
+ mode = svga_match_format(arkfb_formats, &(info->var), &(info->fix));
+
+ /* Set mode-specific register values */
+ switch (mode) {
+ case 0:
+ pr_debug("fb%d: text mode\n", info->node);
+ svga_set_textmode_vga_regs();
+
+ vga_wseq(NULL, 0x11, 0x10); /* basic VGA mode */
+ svga_wcrt_mask(0x46, 0x00, 0x04); /* 8bit pixel path */
+ dac_set_mode(par->dac, DAC_PSEUDO8_8);
+
+ break;
+ case 1:
+ pr_debug("fb%d: 4 bit pseudocolor\n", info->node);
+ vga_wgfx(NULL, VGA_GFX_MODE, 0x40);
+
+ vga_wseq(NULL, 0x11, 0x10); /* basic VGA mode */
+ svga_wcrt_mask(0x46, 0x00, 0x04); /* 8bit pixel path */
+ dac_set_mode(par->dac, DAC_PSEUDO8_8);
+ break;
+ case 2:
+ pr_debug("fb%d: 4 bit pseudocolor, planar\n", info->node);
+
+ vga_wseq(NULL, 0x11, 0x10); /* basic VGA mode */
+ svga_wcrt_mask(0x46, 0x00, 0x04); /* 8bit pixel path */
+ dac_set_mode(par->dac, DAC_PSEUDO8_8);
+ break;
+ case 3:
+ pr_debug("fb%d: 8 bit pseudocolor\n", info->node);
+
+ vga_wseq(NULL, 0x11, 0x16); /* 8bpp accel mode */
+
+ if (info->var.pixclock > 20000) {
+ pr_debug("fb%d: not using multiplex\n", info->node);
+ svga_wcrt_mask(0x46, 0x00, 0x04); /* 8bit pixel path */
+ dac_set_mode(par->dac, DAC_PSEUDO8_8);
+ } else {
+ pr_debug("fb%d: using multiplex\n", info->node);
+ svga_wcrt_mask(0x46, 0x04, 0x04); /* 16bit pixel path */
+ dac_set_mode(par->dac, DAC_PSEUDO8_16);
+ hdiv = 2;
+ }
+ break;
+ case 4:
+ pr_debug("fb%d: 5/5/5 truecolor\n", info->node);
+
+ vga_wseq(NULL, 0x11, 0x1A); /* 16bpp accel mode */
+ svga_wcrt_mask(0x46, 0x04, 0x04); /* 16bit pixel path */
+ dac_set_mode(par->dac, DAC_RGB1555_16);
+ break;
+ case 5:
+ pr_debug("fb%d: 5/6/5 truecolor\n", info->node);
+
+ vga_wseq(NULL, 0x11, 0x1A); /* 16bpp accel mode */
+ svga_wcrt_mask(0x46, 0x04, 0x04); /* 16bit pixel path */
+ dac_set_mode(par->dac, DAC_RGB0565_16);
+ break;
+ case 6:
+ pr_debug("fb%d: 8/8/8 truecolor\n", info->node);
+
+ vga_wseq(NULL, 0x11, 0x16); /* 8bpp accel mode ??? */
+ svga_wcrt_mask(0x46, 0x04, 0x04); /* 16bit pixel path */
+ dac_set_mode(par->dac, DAC_RGB0888_16);
+ hmul = 3;
+ hdiv = 2;
+ break;
+ case 7:
+ pr_debug("fb%d: 8/8/8/8 truecolor\n", info->node);
+
+ vga_wseq(NULL, 0x11, 0x1E); /* 32bpp accel mode */
+ svga_wcrt_mask(0x46, 0x04, 0x04); /* 16bit pixel path */
+ dac_set_mode(par->dac, DAC_RGB8888_16);
+ hmul = 2;
+ break;
+ default:
+ printk(KERN_ERR "fb%d: unsupported mode - bug\n", info->node);
+ return -EINVAL;
+ }
+
+ ark_set_pixclock(info, (hdiv * info->var.pixclock) / hmul);
+ svga_set_timings(&ark_timing_regs, &(info->var), hmul, hdiv,
+ (info->var.vmode & FB_VMODE_DOUBLE) ? 2 : 1,
+ (info->var.vmode & FB_VMODE_INTERLACED) ? 2 : 1,
+ hmul, info->node);
+
+ /* Set interlaced mode start/end register */
+ value = info->var.xres + info->var.left_margin + info->var.right_margin + info->var.hsync_len;
+ value = ((value * hmul / hdiv) / 8) - 5;
+ vga_wcrt(NULL, 0x42, (value + 1) / 2);
+
+ memset_io(info->screen_base, 0x00, screen_size);
+ /* Device and screen back on */
+ svga_wcrt_mask(0x17, 0x80, 0x80);
+ svga_wseq_mask(0x01, 0x00, 0x20);
+
+ return 0;
+}
+
+/* Set a colour register */
+
+static int arkfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
+ u_int transp, struct fb_info *fb)
+{
+ switch (fb->var.bits_per_pixel) {
+ case 0:
+ case 4:
+ if (regno >= 16)
+ return -EINVAL;
+
+ if ((fb->var.bits_per_pixel == 4) &&
+ (fb->var.nonstd == 0)) {
+ outb(0xF0, VGA_PEL_MSK);
+ outb(regno*16, VGA_PEL_IW);
+ } else {
+ outb(0x0F, VGA_PEL_MSK);
+ outb(regno, VGA_PEL_IW);
+ }
+ outb(red >> 10, VGA_PEL_D);
+ outb(green >> 10, VGA_PEL_D);
+ outb(blue >> 10, VGA_PEL_D);
+ break;
+ case 8:
+ if (regno >= 256)
+ return -EINVAL;
+
+ outb(0xFF, VGA_PEL_MSK);
+ outb(regno, VGA_PEL_IW);
+ outb(red >> 10, VGA_PEL_D);
+ outb(green >> 10, VGA_PEL_D);
+ outb(blue >> 10, VGA_PEL_D);
+ break;
+ case 16:
+ if (regno >= 16)
+ return 0;
+
+ if (fb->var.green.length == 5)
+ ((u32*)fb->pseudo_palette)[regno] = ((red & 0xF800) >> 1) |
+ ((green & 0xF800) >> 6) | ((blue & 0xF800) >> 11);
+ else if (fb->var.green.length == 6)
+ ((u32*)fb->pseudo_palette)[regno] = (red & 0xF800) |
+ ((green & 0xFC00) >> 5) | ((blue & 0xF800) >> 11);
+ else
+ return -EINVAL;
+ break;
+ case 24:
+ case 32:
+ if (regno >= 16)
+ return 0;
+
+ ((u32*)fb->pseudo_palette)[regno] = ((red & 0xFF00) << 8) |
+ (green & 0xFF00) | ((blue & 0xFF00) >> 8);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Set the display blanking state */
+
+static int arkfb_blank(int blank_mode, struct fb_info *info)
+{
+ switch (blank_mode) {
+ case FB_BLANK_UNBLANK:
+ pr_debug("fb%d: unblank\n", info->node);
+ svga_wseq_mask(0x01, 0x00, 0x20);
+ svga_wcrt_mask(0x17, 0x80, 0x80);
+ break;
+ case FB_BLANK_NORMAL:
+ pr_debug("fb%d: blank\n", info->node);
+ svga_wseq_mask(0x01, 0x20, 0x20);
+ svga_wcrt_mask(0x17, 0x80, 0x80);
+ break;
+ case FB_BLANK_POWERDOWN:
+ case FB_BLANK_HSYNC_SUSPEND:
+ case FB_BLANK_VSYNC_SUSPEND:
+ pr_debug("fb%d: sync down\n", info->node);
+ svga_wseq_mask(0x01, 0x20, 0x20);
+ svga_wcrt_mask(0x17, 0x00, 0x80);
+ break;
+ }
+ return 0;
+}
+
+
+/* Pan the display */
+
+static int arkfb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+ unsigned int offset;
+
+ /* Calculate the offset */
+ if (var->bits_per_pixel == 0) {
+ offset = (var->yoffset / 16) * (var->xres_virtual / 2) + (var->xoffset / 2);
+ offset = offset >> 2;
+ } else {
+ offset = (var->yoffset * info->fix.line_length) +
+ (var->xoffset * var->bits_per_pixel / 8);
+ offset = offset >> ((var->bits_per_pixel == 4) ? 2 : 3);
+ }
+
+ /* Set the offset */
+ svga_wcrt_multi(ark_start_address_regs, offset);
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+
+/* Frame buffer operations */
+
+static struct fb_ops arkfb_ops = {
+ .owner = THIS_MODULE,
+ .fb_open = arkfb_open,
+ .fb_release = arkfb_release,
+ .fb_check_var = arkfb_check_var,
+ .fb_set_par = arkfb_set_par,
+ .fb_setcolreg = arkfb_setcolreg,
+ .fb_blank = arkfb_blank,
+ .fb_pan_display = arkfb_pan_display,
+ .fb_fillrect = arkfb_fillrect,
+ .fb_copyarea = cfb_copyarea,
+ .fb_imageblit = arkfb_imageblit,
+ .fb_get_caps = svga_get_caps,
+};
+
+
+/* ------------------------------------------------------------------------- */
+
+
+/* PCI probe */
+static int __devinit ark_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ struct fb_info *info;
+ struct arkfb_info *par;
+ int rc;
+ u8 regval;
+
+ /* Ignore secondary VGA device because there is no VGA arbitration */
+ if (! svga_primary_device(dev)) {
+ dev_info(&(dev->dev), "ignoring secondary device\n");
+ return -ENODEV;
+ }
+
+ /* Allocate and fill driver data structure */
+ info = framebuffer_alloc(sizeof(struct arkfb_info), NULL);
+ if (! info) {
+ dev_err(&(dev->dev), "cannot allocate memory\n");
+ return -ENOMEM;
+ }
+
+ par = info->par;
+ mutex_init(&par->open_lock);
+
+ info->flags = FBINFO_PARTIAL_PAN_OK | FBINFO_HWACCEL_YPAN;
+ info->fbops = &arkfb_ops;
+
+ /* Prepare PCI device */
+ rc = pci_enable_device(dev);
+ if (rc < 0) {
+ dev_err(&(dev->dev), "cannot enable PCI device\n");
+ goto err_enable_device;
+ }
+
+ rc = pci_request_regions(dev, "arkfb");
+ if (rc < 0) {
+ dev_err(&(dev->dev), "cannot reserve framebuffer region\n");
+ goto err_request_regions;
+ }
+
+ par->dac = ics5342_init(ark_dac_read_regs, ark_dac_write_regs, info);
+ if (! par->dac) {
+ rc = -ENOMEM;
+ dev_err(&(dev->dev), "RAMDAC initialization failed\n");
+ goto err_dac;
+ }
+
+ info->fix.smem_start = pci_resource_start(dev, 0);
+ info->fix.smem_len = pci_resource_len(dev, 0);
+
+ /* Map physical IO memory address into kernel space */
+ info->screen_base = pci_iomap(dev, 0, 0);
+ if (! info->screen_base) {
+ rc = -ENOMEM;
+ dev_err(&(dev->dev), "iomap for framebuffer failed\n");
+ goto err_iomap;
+ }
+
+ /* FIXME get memsize */
+ regval = vga_rseq(NULL, 0x10);
+ info->screen_size = (1 << (regval >> 6)) << 20;
+ info->fix.smem_len = info->screen_size;
+
+ strcpy(info->fix.id, "ARK 2000PV");
+ info->fix.mmio_start = 0;
+ info->fix.mmio_len = 0;
+ info->fix.type = FB_TYPE_PACKED_PIXELS;
+ info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+ info->fix.ypanstep = 0;
+ info->fix.accel = FB_ACCEL_NONE;
+ info->pseudo_palette = (void*) (par->pseudo_palette);
+
+ /* Prepare startup mode */
+ rc = fb_find_mode(&(info->var), info, mode, NULL, 0, NULL, 8);
+ if (! ((rc == 1) || (rc == 2))) {
+ rc = -EINVAL;
+ dev_err(&(dev->dev), "mode %s not found\n", mode);
+ goto err_find_mode;
+ }
+
+ rc = fb_alloc_cmap(&info->cmap, 256, 0);
+ if (rc < 0) {
+ dev_err(&(dev->dev), "cannot allocate colormap\n");
+ goto err_alloc_cmap;
+ }
+
+ rc = register_framebuffer(info);
+ if (rc < 0) {
+ dev_err(&(dev->dev), "cannot register framebugger\n");
+ goto err_reg_fb;
+ }
+
+ printk(KERN_INFO "fb%d: %s on %s, %d MB RAM\n", info->node, info->fix.id,
+ pci_name(dev), info->fix.smem_len >> 20);
+
+ /* Record a reference to the driver data */
+ pci_set_drvdata(dev, info);
+
+#ifdef CONFIG_MTRR
+ if (mtrr) {
+ par->mtrr_reg = -1;
+ par->mtrr_reg = mtrr_add(info->fix.smem_start, info->fix.smem_len, MTRR_TYPE_WRCOMB, 1);
+ }
+#endif
+
+ return 0;
+
+ /* Error handling */
+err_reg_fb:
+ fb_dealloc_cmap(&info->cmap);
+err_alloc_cmap:
+err_find_mode:
+ pci_iounmap(dev, info->screen_base);
+err_iomap:
+ dac_release(par->dac);
+err_dac:
+ pci_release_regions(dev);
+err_request_regions:
+/* pci_disable_device(dev); */
+err_enable_device:
+ framebuffer_release(info);
+ return rc;
+}
+
+/* PCI remove */
+
+static void __devexit ark_pci_remove(struct pci_dev *dev)
+{
+ struct fb_info *info = pci_get_drvdata(dev);
+ struct arkfb_info *par = info->par;
+
+ if (info) {
+#ifdef CONFIG_MTRR
+ if (par->mtrr_reg >= 0) {
+ mtrr_del(par->mtrr_reg, 0, 0);
+ par->mtrr_reg = -1;
+ }
+#endif
+
+ dac_release(par->dac);
+ unregister_framebuffer(info);
+ fb_dealloc_cmap(&info->cmap);
+
+ pci_iounmap(dev, info->screen_base);
+ pci_release_regions(dev);
+/* pci_disable_device(dev); */
+
+ pci_set_drvdata(dev, NULL);
+ framebuffer_release(info);
+ }
+}
+
+
+#ifdef CONFIG_PM
+/* PCI suspend */
+
+static int ark_pci_suspend (struct pci_dev* dev, pm_message_t state)
+{
+ struct fb_info *info = pci_get_drvdata(dev);
+ struct arkfb_info *par = info->par;
+
+ dev_info(&(dev->dev), "suspend\n");
+
+ acquire_console_sem();
+ mutex_lock(&(par->open_lock));
+
+ if ((state.event == PM_EVENT_FREEZE) || (par->ref_count == 0)) {
+ mutex_unlock(&(par->open_lock));
+ release_console_sem();
+ return 0;
+ }
+
+ fb_set_suspend(info, 1);
+
+ pci_save_state(dev);
+ pci_disable_device(dev);
+ pci_set_power_state(dev, pci_choose_state(dev, state));
+
+ mutex_unlock(&(par->open_lock));
+ release_console_sem();
+
+ return 0;
+}
+
+
+/* PCI resume */
+
+static int ark_pci_resume (struct pci_dev* dev)
+{
+ struct fb_info *info = pci_get_drvdata(dev);
+ struct arkfb_info *par = info->par;
+
+ dev_info(&(dev->dev), "resume\n");
+
+ acquire_console_sem();
+ mutex_lock(&(par->open_lock));
+
+ if (par->ref_count == 0) {
+ mutex_unlock(&(par->open_lock));
+ release_console_sem();
+ return 0;
+ }
+
+ pci_set_power_state(dev, PCI_D0);
+ pci_restore_state(dev);
+
+ if (pci_enable_device(dev))
+ goto fail;
+
+ pci_set_master(dev);
+
+ arkfb_set_par(info);
+ fb_set_suspend(info, 0);
+
+ mutex_unlock(&(par->open_lock));
+fail:
+ release_console_sem();
+ return 0;
+}
+#else
+#define ark_pci_suspend NULL
+#define ark_pci_resume NULL
+#endif /* CONFIG_PM */
+
+/* List of boards that we are trying to support */
+
+static struct pci_device_id ark_devices[] __devinitdata = {
+ {PCI_DEVICE(0xEDD8, 0xA099)},
+ {0, 0, 0, 0, 0, 0, 0}
+};
+
+
+MODULE_DEVICE_TABLE(pci, ark_devices);
+
+static struct pci_driver arkfb_pci_driver = {
+ .name = "arkfb",
+ .id_table = ark_devices,
+ .probe = ark_pci_probe,
+ .remove = __devexit_p(ark_pci_remove),
+ .suspend = ark_pci_suspend,
+ .resume = ark_pci_resume,
+};
+
+/* Cleanup */
+
+static void __exit arkfb_cleanup(void)
+{
+ pr_debug("arkfb: cleaning up\n");
+ pci_unregister_driver(&arkfb_pci_driver);
+}
+
+/* Driver Initialisation */
+
+static int __init arkfb_init(void)
+{
+
+#ifndef MODULE
+ char *option = NULL;
+
+ if (fb_get_options("arkfb", &option))
+ return -ENODEV;
+
+ if (option && *option)
+ mode = option;
+#endif
+
+ pr_debug("arkfb: initializing\n");
+ return pci_register_driver(&arkfb_pci_driver);
+}
+
+module_init(arkfb_init);
+module_exit(arkfb_cleanup);
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 08d4e11d912..38c2e2558f5 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1236,6 +1236,10 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
#elif defined(__arm__) || defined(__sh__) || defined(__m32r__)
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+#elif defined(__avr32__)
+ vma->vm_page_prot = __pgprot((pgprot_val(vma->vm_page_prot)
+ & ~_PAGE_CACHABLE)
+ | (_PAGE_BUFFER | _PAGE_DIRTY));
#elif defined(__ia64__)
if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start))
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
diff --git a/drivers/video/nvidia/nv_hw.c b/drivers/video/nvidia/nv_hw.c
index f297c7b14a4..c627955aa12 100644
--- a/drivers/video/nvidia/nv_hw.c
+++ b/drivers/video/nvidia/nv_hw.c
@@ -149,8 +149,7 @@ static void nvGetClocks(struct nvidia_par *par, unsigned int *MClk,
pll = NV_RD32(par->PMC, 0x4024);
M = pll & 0xFF;
N = (pll >> 8) & 0xFF;
- if (((par->Chipset & 0xfff0) == 0x0290) ||
- ((par->Chipset & 0xfff0) == 0x0390)) {
+ if (((par->Chipset & 0xfff0) == 0x0290) || ((par->Chipset & 0xfff0) == 0x0390) || ((par->Chipset & 0xfff0) == 0x02E0)) {
MB = 1;
NB = 1;
} else {
@@ -963,6 +962,7 @@ void NVLoadStateExt(struct nvidia_par *par, RIVA_HW_STATE * state)
if (((par->Chipset & 0xfff0) == 0x0090) ||
((par->Chipset & 0xfff0) == 0x01D0) ||
+ ((par->Chipset & 0xfff0) == 0x02E0) ||
((par->Chipset & 0xfff0) == 0x0290))
regions = 15;
for(i = 0; i < regions; i++) {
@@ -1275,6 +1275,7 @@ void NVLoadStateExt(struct nvidia_par *par, RIVA_HW_STATE * state)
0x00100000);
break;
case 0x0090:
+ case 0x02E0:
case 0x0290:
NV_WR32(par->PRAMDAC, 0x0608,
NV_RD32(par->PRAMDAC, 0x0608) |
@@ -1352,6 +1353,7 @@ void NVLoadStateExt(struct nvidia_par *par, RIVA_HW_STATE * state)
} else {
if (((par->Chipset & 0xfff0) == 0x0090) ||
((par->Chipset & 0xfff0) == 0x01D0) ||
+ ((par->Chipset & 0xfff0) == 0x02E0) ||
((par->Chipset & 0xfff0) == 0x0290)) {
for (i = 0; i < 60; i++) {
NV_WR32(par->PGRAPH,
@@ -1403,6 +1405,7 @@ void NVLoadStateExt(struct nvidia_par *par, RIVA_HW_STATE * state)
} else {
if ((par->Chipset & 0xfff0) == 0x0090 ||
(par->Chipset & 0xfff0) == 0x01D0 ||
+ (par->Chipset & 0xfff0) == 0x02E0 ||
(par->Chipset & 0xfff0) == 0x0290) {
NV_WR32(par->PGRAPH, 0x0DF0,
NV_RD32(par->PFB, 0x0200));
diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c
index 7c36b5fe582..f85edf084da 100644
--- a/drivers/video/nvidia/nvidia.c
+++ b/drivers/video/nvidia/nvidia.c
@@ -1243,6 +1243,7 @@ static u32 __devinit nvidia_get_arch(struct fb_info *info)
case 0x0140: /* GeForce 6600 */
case 0x0160: /* GeForce 6200 */
case 0x01D0: /* GeForce 7200, 7300, 7400 */
+ case 0x02E0: /* GeForce 7300 GT */
case 0x0090: /* GeForce 7800 */
case 0x0210: /* GeForce 6800 */
case 0x0220: /* GeForce 6200 */
diff --git a/drivers/video/s3fb.c b/drivers/video/s3fb.c
index 756fafb41d7..d11735895a0 100644
--- a/drivers/video/s3fb.c
+++ b/drivers/video/s3fb.c
@@ -796,23 +796,6 @@ static int s3fb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
return 0;
}
-/* Get capabilities of accelerator based on the mode */
-
-static void s3fb_get_caps(struct fb_info *info, struct fb_blit_caps *caps,
- struct fb_var_screeninfo *var)
-{
- if (var->bits_per_pixel == 0) {
- /* can only support 256 8x16 bitmap */
- caps->x = 1 << (8 - 1);
- caps->y = 1 << (16 - 1);
- caps->len = 256;
- } else {
- caps->x = ~(u32)0;
- caps->y = ~(u32)0;
- caps->len = ~(u32)0;
- }
-}
-
/* ------------------------------------------------------------------------- */
/* Frame buffer operations */
@@ -829,7 +812,7 @@ static struct fb_ops s3fb_ops = {
.fb_fillrect = s3fb_fillrect,
.fb_copyarea = cfb_copyarea,
.fb_imageblit = s3fb_imageblit,
- .fb_get_caps = s3fb_get_caps,
+ .fb_get_caps = svga_get_caps,
};
/* ------------------------------------------------------------------------- */
diff --git a/drivers/video/svgalib.c b/drivers/video/svgalib.c
index 079cdc911e4..25df928d37d 100644
--- a/drivers/video/svgalib.c
+++ b/drivers/video/svgalib.c
@@ -347,6 +347,23 @@ int svga_get_tilemax(struct fb_info *info)
return 256;
}
+/* Get capabilities of accelerator based on the mode */
+
+void svga_get_caps(struct fb_info *info, struct fb_blit_caps *caps,
+ struct fb_var_screeninfo *var)
+{
+ if (var->bits_per_pixel == 0) {
+ /* can only support 256 8x16 bitmap */
+ caps->x = 1 << (8 - 1);
+ caps->y = 1 << (16 - 1);
+ caps->len = 256;
+ } else {
+ caps->x = (var->bits_per_pixel == 4) ? 1 << (8 - 1) : ~(u32)0;
+ caps->y = ~(u32)0;
+ caps->len = ~(u32)0;
+ }
+}
+EXPORT_SYMBOL(svga_get_caps);
/* ------------------------------------------------------------------------- */
diff --git a/drivers/video/vt8623fb.c b/drivers/video/vt8623fb.c
new file mode 100644
index 00000000000..5e9755e464a
--- /dev/null
+++ b/drivers/video/vt8623fb.c
@@ -0,0 +1,927 @@
+/*
+ * linux/drivers/video/vt8623fb.c - fbdev driver for
+ * integrated graphic core in VIA VT8623 [CLE266] chipset
+ *
+ * Copyright (c) 2006-2007 Ondrej Zajicek <santiago@crfreenet.org>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive for
+ * more details.
+ *
+ * Code is based on s3fb, some parts are from David Boucher's viafb
+ * (http://davesdomain.org.uk/viafb/)
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/tty.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/fb.h>
+#include <linux/svga.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/console.h> /* Why should fb driver call console functions? because acquire_console_sem() */
+#include <video/vga.h>
+
+#ifdef CONFIG_MTRR
+#include <asm/mtrr.h>
+#endif
+
+struct vt8623fb_info {
+ char __iomem *mmio_base;
+ int mtrr_reg;
+ struct vgastate state;
+ struct mutex open_lock;
+ unsigned int ref_count;
+ u32 pseudo_palette[16];
+};
+
+
+
+/* ------------------------------------------------------------------------- */
+
+static const struct svga_fb_format vt8623fb_formats[] = {
+ { 0, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0,
+ FB_TYPE_TEXT, FB_AUX_TEXT_SVGA_STEP8, FB_VISUAL_PSEUDOCOLOR, 16, 16},
+ { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 16, 16},
+ { 4, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 1,
+ FB_TYPE_INTERLEAVED_PLANES, 1, FB_VISUAL_PSEUDOCOLOR, 16, 16},
+ { 8, {0, 6, 0}, {0, 6, 0}, {0, 6, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_PSEUDOCOLOR, 8, 8},
+/* {16, {10, 5, 0}, {5, 5, 0}, {0, 5, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 4, 4}, */
+ {16, {11, 5, 0}, {5, 6, 0}, {0, 5, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 4, 4},
+ {32, {16, 8, 0}, {8, 8, 0}, {0, 8, 0}, {0, 0, 0}, 0,
+ FB_TYPE_PACKED_PIXELS, 0, FB_VISUAL_TRUECOLOR, 2, 2},
+ SVGA_FORMAT_END
+};
+
+static const struct svga_pll vt8623_pll = {2, 127, 2, 7, 0, 3,
+ 60000, 300000, 14318};
+
+/* CRT timing register sets */
+
+struct vga_regset vt8623_h_total_regs[] = {{0x00, 0, 7}, {0x36, 3, 3}, VGA_REGSET_END};
+struct vga_regset vt8623_h_display_regs[] = {{0x01, 0, 7}, VGA_REGSET_END};
+struct vga_regset vt8623_h_blank_start_regs[] = {{0x02, 0, 7}, VGA_REGSET_END};
+struct vga_regset vt8623_h_blank_end_regs[] = {{0x03, 0, 4}, {0x05, 7, 7}, {0x33, 5, 5}, VGA_REGSET_END};
+struct vga_regset vt8623_h_sync_start_regs[] = {{0x04, 0, 7}, {0x33, 4, 4}, VGA_REGSET_END};
+struct vga_regset vt8623_h_sync_end_regs[] = {{0x05, 0, 4}, VGA_REGSET_END};
+
+struct vga_regset vt8623_v_total_regs[] = {{0x06, 0, 7}, {0x07, 0, 0}, {0x07, 5, 5}, {0x35, 0, 0}, VGA_REGSET_END};
+struct vga_regset vt8623_v_display_regs[] = {{0x12, 0, 7}, {0x07, 1, 1}, {0x07, 6, 6}, {0x35, 2, 2}, VGA_REGSET_END};
+struct vga_regset vt8623_v_blank_start_regs[] = {{0x15, 0, 7}, {0x07, 3, 3}, {0x09, 5, 5}, {0x35, 3, 3}, VGA_REGSET_END};
+struct vga_regset vt8623_v_blank_end_regs[] = {{0x16, 0, 7}, VGA_REGSET_END};
+struct vga_regset vt8623_v_sync_start_regs[] = {{0x10, 0, 7}, {0x07, 2, 2}, {0x07, 7, 7}, {0x35, 1, 1}, VGA_REGSET_END};
+struct vga_regset vt8623_v_sync_end_regs[] = {{0x11, 0, 3}, VGA_REGSET_END};
+
+struct vga_regset vt8623_offset_regs[] = {{0x13, 0, 7}, {0x35, 5, 7}, VGA_REGSET_END};
+struct vga_regset vt8623_line_compare_regs[] = {{0x18, 0, 7}, {0x07, 4, 4}, {0x09, 6, 6}, {0x33, 0, 2}, {0x35, 4, 4}, VGA_REGSET_END};
+struct vga_regset vt8623_fetch_count_regs[] = {{0x1C, 0, 7}, {0x1D, 0, 1}, VGA_REGSET_END};
+struct vga_regset vt8623_start_address_regs[] = {{0x0d, 0, 7}, {0x0c, 0, 7}, {0x34, 0, 7}, {0x48, 0, 1}, VGA_REGSET_END};
+
+struct svga_timing_regs vt8623_timing_regs = {
+ vt8623_h_total_regs, vt8623_h_display_regs, vt8623_h_blank_start_regs,
+ vt8623_h_blank_end_regs, vt8623_h_sync_start_regs, vt8623_h_sync_end_regs,
+ vt8623_v_total_regs, vt8623_v_display_regs, vt8623_v_blank_start_regs,
+ vt8623_v_blank_end_regs, vt8623_v_sync_start_regs, vt8623_v_sync_end_regs,
+};
+
+
+/* ------------------------------------------------------------------------- */
+
+
+/* Module parameters */
+
+static char *mode = "640x480-8@60";
+
+#ifdef CONFIG_MTRR
+static int mtrr = 1;
+#endif
+
+MODULE_AUTHOR("(c) 2006 Ondrej Zajicek <santiago@crfreenet.org>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("fbdev driver for integrated graphics core in VIA VT8623 [CLE266]");
+
+module_param(mode, charp, 0644);
+MODULE_PARM_DESC(mode, "Default video mode ('640x480-8@60', etc)");
+
+#ifdef CONFIG_MTRR
+module_param(mtrr, int, 0444);
+MODULE_PARM_DESC(mtrr, "Enable write-combining with MTRR (1=enable, 0=disable, default=1)");
+#endif
+
+
+/* ------------------------------------------------------------------------- */
+
+
+static struct fb_tile_ops vt8623fb_tile_ops = {
+ .fb_settile = svga_settile,
+ .fb_tilecopy = svga_tilecopy,
+ .fb_tilefill = svga_tilefill,
+ .fb_tileblit = svga_tileblit,
+ .fb_tilecursor = svga_tilecursor,
+ .fb_get_tilemax = svga_get_tilemax,
+};
+
+
+/* ------------------------------------------------------------------------- */
+
+
+/* image data is MSB-first, fb structure is MSB-first too */
+static inline u32 expand_color(u32 c)
+{
+ return ((c & 1) | ((c & 2) << 7) | ((c & 4) << 14) | ((c & 8) << 21)) * 0xFF;
+}
+
+/* vt8623fb_iplan_imageblit silently assumes that almost everything is 8-pixel aligned */
+static void vt8623fb_iplan_imageblit(struct fb_info *info, const struct fb_image *image)
+{
+ u32 fg = expand_color(image->fg_color);
+ u32 bg = expand_color(image->bg_color);
+ const u8 *src1, *src;
+ u8 __iomem *dst1;
+ u32 __iomem *dst;
+ u32 val;
+ int x, y;
+
+ src1 = image->data;
+ dst1 = info->screen_base + (image->dy * info->fix.line_length)
+ + ((image->dx / 8) * 4);
+
+ for (y = 0; y < image->height; y++) {
+ src = src1;
+ dst = (u32 __iomem *) dst1;
+ for (x = 0; x < image->width; x += 8) {
+ val = *(src++) * 0x01010101;
+ val = (val & fg) | (~val & bg);
+ fb_writel(val, dst++);
+ }
+ src1 += image->width / 8;
+ dst1 += info->fix.line_length;
+ }
+}
+
+/* vt8623fb_iplan_fillrect silently assumes that almost everything is 8-pixel aligned */
+static void vt8623fb_iplan_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+ u32 fg = expand_color(rect->color);
+ u8 __iomem *dst1;
+ u32 __iomem *dst;
+ int x, y;
+
+ dst1 = info->screen_base + (rect->dy * info->fix.line_length)
+ + ((rect->dx / 8) * 4);
+
+ for (y = 0; y < rect->height; y++) {
+ dst = (u32 __iomem *) dst1;
+ for (x = 0; x < rect->width; x += 8) {
+ fb_writel(fg, dst++);
+ }
+ dst1 += info->fix.line_length;
+ }
+}
+
+
+/* image data is MSB-first, fb structure is high-nibble-in-low-byte-first */
+static inline u32 expand_pixel(u32 c)
+{
+ return (((c & 1) << 24) | ((c & 2) << 27) | ((c & 4) << 14) | ((c & 8) << 17) |
+ ((c & 16) << 4) | ((c & 32) << 7) | ((c & 64) >> 6) | ((c & 128) >> 3)) * 0xF;
+}
+
+/* vt8623fb_cfb4_imageblit silently assumes that almost everything is 8-pixel aligned */
+static void vt8623fb_cfb4_imageblit(struct fb_info *info, const struct fb_image *image)
+{
+ u32 fg = image->fg_color * 0x11111111;
+ u32 bg = image->bg_color * 0x11111111;
+ const u8 *src1, *src;
+ u8 __iomem *dst1;
+ u32 __iomem *dst;
+ u32 val;
+ int x, y;
+
+ src1 = image->data;
+ dst1 = info->screen_base + (image->dy * info->fix.line_length)
+ + ((image->dx / 8) * 4);
+
+ for (y = 0; y < image->height; y++) {
+ src = src1;
+ dst = (u32 __iomem *) dst1;
+ for (x = 0; x < image->width; x += 8) {
+ val = expand_pixel(*(src++));
+ val = (val & fg) | (~val & bg);
+ fb_writel(val, dst++);
+ }
+ src1 += image->width / 8;
+ dst1 += info->fix.line_length;
+ }
+}
+
+static void vt8623fb_imageblit(struct fb_info *info, const struct fb_image *image)
+{
+ if ((info->var.bits_per_pixel == 4) && (image->depth == 1)
+ && ((image->width % 8) == 0) && ((image->dx % 8) == 0)) {
+ if (info->fix.type == FB_TYPE_INTERLEAVED_PLANES)
+ vt8623fb_iplan_imageblit(info, image);
+ else
+ vt8623fb_cfb4_imageblit(info, image);
+ } else
+ cfb_imageblit(info, image);
+}
+
+static void vt8623fb_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
+{
+ if ((info->var.bits_per_pixel == 4)
+ && ((rect->width % 8) == 0) && ((rect->dx % 8) == 0)
+ && (info->fix.type == FB_TYPE_INTERLEAVED_PLANES))
+ vt8623fb_iplan_fillrect(info, rect);
+ else
+ cfb_fillrect(info, rect);
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+
+static void vt8623_set_pixclock(struct fb_info *info, u32 pixclock)
+{
+ u16 m, n, r;
+ u8 regval;
+ int rv;
+
+ rv = svga_compute_pll(&vt8623_pll, 1000000000 / pixclock, &m, &n, &r, info->node);
+ if (rv < 0) {
+ printk(KERN_ERR "fb%d: cannot set requested pixclock, keeping old value\n", info->node);
+ return;
+ }
+
+ /* Set VGA misc register */
+ regval = vga_r(NULL, VGA_MIS_R);
+ vga_w(NULL, VGA_MIS_W, regval | VGA_MIS_ENB_PLL_LOAD);
+
+ /* Set clock registers */
+ vga_wseq(NULL, 0x46, (n | (r << 6)));
+ vga_wseq(NULL, 0x47, m);
+
+ udelay(1000);
+
+ /* PLL reset */
+ svga_wseq_mask(0x40, 0x02, 0x02);
+ svga_wseq_mask(0x40, 0x00, 0x02);
+}
+
+
+static int vt8623fb_open(struct fb_info *info, int user)
+{
+ struct vt8623fb_info *par = info->par;
+
+ mutex_lock(&(par->open_lock));
+ if (par->ref_count == 0) {
+ memset(&(par->state), 0, sizeof(struct vgastate));
+ par->state.flags = VGA_SAVE_MODE | VGA_SAVE_FONTS | VGA_SAVE_CMAP;
+ par->state.num_crtc = 0xA2;
+ par->state.num_seq = 0x50;
+ save_vga(&(par->state));
+ }
+
+ par->ref_count++;
+ mutex_unlock(&(par->open_lock));
+
+ return 0;
+}
+
+static int vt8623fb_release(struct fb_info *info, int user)
+{
+ struct vt8623fb_info *par = info->par;
+
+ mutex_lock(&(par->open_lock));
+ if (par->ref_count == 0) {
+ mutex_unlock(&(par->open_lock));
+ return -EINVAL;
+ }
+
+ if (par->ref_count == 1)
+ restore_vga(&(par->state));
+
+ par->ref_count--;
+ mutex_unlock(&(par->open_lock));
+
+ return 0;
+}
+
+static int vt8623fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+ int rv, mem, step;
+
+ /* Find appropriate format */
+ rv = svga_match_format (vt8623fb_formats, var, NULL);
+ if (rv < 0)
+ {
+ printk(KERN_ERR "fb%d: unsupported mode requested\n", info->node);
+ return rv;
+ }
+
+ /* Do not allow to have real resoulution larger than virtual */
+ if (var->xres > var->xres_virtual)
+ var->xres_virtual = var->xres;
+
+ if (var->yres > var->yres_virtual)
+ var->yres_virtual = var->yres;
+
+ /* Round up xres_virtual to have proper alignment of lines */
+ step = vt8623fb_formats[rv].xresstep - 1;
+ var->xres_virtual = (var->xres_virtual+step) & ~step;
+
+ /* Check whether have enough memory */
+ mem = ((var->bits_per_pixel * var->xres_virtual) >> 3) * var->yres_virtual;
+ if (mem > info->screen_size)
+ {
+ printk(KERN_ERR "fb%d: not enough framebuffer memory (%d kB requested , %d kB available)\n", info->node, mem >> 10, (unsigned int) (info->screen_size >> 10));
+ return -EINVAL;
+ }
+
+ /* Text mode is limited to 256 kB of memory */
+ if ((var->bits_per_pixel == 0) && (mem > (256*1024)))
+ {
+ printk(KERN_ERR "fb%d: text framebuffer size too large (%d kB requested, 256 kB possible)\n", info->node, mem >> 10);
+ return -EINVAL;
+ }
+
+ rv = svga_check_timings (&vt8623_timing_regs, var, info->node);
+ if (rv < 0)
+ {
+ printk(KERN_ERR "fb%d: invalid timings requested\n", info->node);
+ return rv;
+ }
+
+ /* Interlaced mode not supported */
+ if (var->vmode & FB_VMODE_INTERLACED)
+ return -EINVAL;
+
+ return 0;
+}
+
+
+static int vt8623fb_set_par(struct fb_info *info)
+{
+ u32 mode, offset_value, fetch_value, screen_size;
+ u32 bpp = info->var.bits_per_pixel;
+
+ if (bpp != 0) {
+ info->fix.ypanstep = 1;
+ info->fix.line_length = (info->var.xres_virtual * bpp) / 8;
+
+ info->flags &= ~FBINFO_MISC_TILEBLITTING;
+ info->tileops = NULL;
+
+ /* in 4bpp supports 8p wide tiles only, any tiles otherwise */
+ info->pixmap.blit_x = (bpp == 4) ? (1 << (8 - 1)) : (~(u32)0);
+ info->pixmap.blit_y = ~(u32)0;
+
+ offset_value = (info->var.xres_virtual * bpp) / 64;
+ fetch_value = ((info->var.xres * bpp) / 128) + 4;
+
+ if (bpp == 4)
+ fetch_value = (info->var.xres / 8) + 8; /* + 0 is OK */
+
+ screen_size = info->var.yres_virtual * info->fix.line_length;
+ } else {
+ info->fix.ypanstep = 16;
+ info->fix.line_length = 0;
+
+ info->flags |= FBINFO_MISC_TILEBLITTING;
+ info->tileops = &vt8623fb_tile_ops;
+
+ /* supports 8x16 tiles only */
+ info->pixmap.blit_x = 1 << (8 - 1);
+ info->pixmap.blit_y = 1 << (16 - 1);
+
+ offset_value = info->var.xres_virtual / 16;
+ fetch_value = (info->var.xres / 8) + 8;
+ screen_size = (info->var.xres_virtual * info->var.yres_virtual) / 64;
+ }
+
+ info->var.xoffset = 0;
+ info->var.yoffset = 0;
+ info->var.activate = FB_ACTIVATE_NOW;
+
+ /* Unlock registers */
+ svga_wseq_mask(0x10, 0x01, 0x01);
+ svga_wcrt_mask(0x11, 0x00, 0x80);
+ svga_wcrt_mask(0x47, 0x00, 0x01);
+
+ /* Device, screen and sync off */
+ svga_wseq_mask(0x01, 0x20, 0x20);
+ svga_wcrt_mask(0x36, 0x30, 0x30);
+ svga_wcrt_mask(0x17, 0x00, 0x80);
+
+ /* Set default values */
+ svga_set_default_gfx_regs();
+ svga_set_default_atc_regs();
+ svga_set_default_seq_regs();
+ svga_set_default_crt_regs();
+ svga_wcrt_multi(vt8623_line_compare_regs, 0xFFFFFFFF);
+ svga_wcrt_multi(vt8623_start_address_regs, 0);
+
+ svga_wcrt_multi(vt8623_offset_regs, offset_value);
+ svga_wseq_multi(vt8623_fetch_count_regs, fetch_value);
+
+ if (info->var.vmode & FB_VMODE_DOUBLE)
+ svga_wcrt_mask(0x09, 0x80, 0x80);
+ else
+ svga_wcrt_mask(0x09, 0x00, 0x80);
+
+ svga_wseq_mask(0x1E, 0xF0, 0xF0); // DI/DVP bus
+ svga_wseq_mask(0x2A, 0x0F, 0x0F); // DI/DVP bus
+ svga_wseq_mask(0x16, 0x08, 0xBF); // FIFO read treshold
+ vga_wseq(NULL, 0x17, 0x1F); // FIFO depth
+ vga_wseq(NULL, 0x18, 0x4E);
+ svga_wseq_mask(0x1A, 0x08, 0x08); // enable MMIO ?
+
+ vga_wcrt(NULL, 0x32, 0x00);
+ vga_wcrt(NULL, 0x34, 0x00);
+ vga_wcrt(NULL, 0x6A, 0x80);
+ vga_wcrt(NULL, 0x6A, 0xC0);
+
+ vga_wgfx(NULL, 0x20, 0x00);
+ vga_wgfx(NULL, 0x21, 0x00);
+ vga_wgfx(NULL, 0x22, 0x00);
+
+ /* Set SR15 according to number of bits per pixel */
+ mode = svga_match_format(vt8623fb_formats, &(info->var), &(info->fix));
+ switch (mode) {
+ case 0:
+ pr_debug("fb%d: text mode\n", info->node);
+ svga_set_textmode_vga_regs();
+ svga_wseq_mask(0x15, 0x00, 0xFE);
+ svga_wcrt_mask(0x11, 0x60, 0x70);
+ break;
+ case 1:
+ pr_debug("fb%d: 4 bit pseudocolor\n", info->node);
+ vga_wgfx(NULL, VGA_GFX_MODE, 0x40);
+ svga_wseq_mask(0x15, 0x20, 0xFE);
+ svga_wcrt_mask(0x11, 0x00, 0x70);
+ break;
+ case 2:
+ pr_debug("fb%d: 4 bit pseudocolor, planar\n", info->node);
+ svga_wseq_mask(0x15, 0x00, 0xFE);
+ svga_wcrt_mask(0x11, 0x00, 0x70);
+ break;
+ case 3:
+ pr_debug("fb%d: 8 bit pseudocolor\n", info->node);
+ svga_wseq_mask(0x15, 0x22, 0xFE);
+ break;
+ case 4:
+ pr_debug("fb%d: 5/6/5 truecolor\n", info->node);
+ svga_wseq_mask(0x15, 0xB6, 0xFE);
+ break;
+ case 5:
+ pr_debug("fb%d: 8/8/8 truecolor\n", info->node);
+ svga_wseq_mask(0x15, 0xAE, 0xFE);
+ break;
+ default:
+ printk(KERN_ERR "vt8623fb: unsupported mode - bug\n");
+ return (-EINVAL);
+ }
+
+ vt8623_set_pixclock(info, info->var.pixclock);
+ svga_set_timings(&vt8623_timing_regs, &(info->var), 1, 1,
+ (info->var.vmode & FB_VMODE_DOUBLE) ? 2 : 1, 1,
+ 1, info->node);
+
+ memset_io(info->screen_base, 0x00, screen_size);
+
+ /* Device and screen back on */
+ svga_wcrt_mask(0x17, 0x80, 0x80);
+ svga_wcrt_mask(0x36, 0x00, 0x30);
+ svga_wseq_mask(0x01, 0x00, 0x20);
+
+ return 0;
+}
+
+
+static int vt8623fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
+ u_int transp, struct fb_info *fb)
+{
+ switch (fb->var.bits_per_pixel) {
+ case 0:
+ case 4:
+ if (regno >= 16)
+ return -EINVAL;
+
+ outb(0x0F, VGA_PEL_MSK);
+ outb(regno, VGA_PEL_IW);
+ outb(red >> 10, VGA_PEL_D);
+ outb(green >> 10, VGA_PEL_D);
+ outb(blue >> 10, VGA_PEL_D);
+ break;
+ case 8:
+ if (regno >= 256)
+ return -EINVAL;
+
+ outb(0xFF, VGA_PEL_MSK);
+ outb(regno, VGA_PEL_IW);
+ outb(red >> 10, VGA_PEL_D);
+ outb(green >> 10, VGA_PEL_D);
+ outb(blue >> 10, VGA_PEL_D);
+ break;
+ case 16:
+ if (regno >= 16)
+ return 0;
+
+ if (fb->var.green.length == 5)
+ ((u32*)fb->pseudo_palette)[regno] = ((red & 0xF800) >> 1) |
+ ((green & 0xF800) >> 6) | ((blue & 0xF800) >> 11);
+ else if (fb->var.green.length == 6)
+ ((u32*)fb->pseudo_palette)[regno] = (red & 0xF800) |
+ ((green & 0xFC00) >> 5) | ((blue & 0xF800) >> 11);
+ else
+ return -EINVAL;
+ break;
+ case 24:
+ case 32:
+ if (regno >= 16)
+ return 0;
+
+ /* ((transp & 0xFF00) << 16) */
+ ((u32*)fb->pseudo_palette)[regno] = ((red & 0xFF00) << 8) |
+ (green & 0xFF00) | ((blue & 0xFF00) >> 8);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int vt8623fb_blank(int blank_mode, struct fb_info *info)
+{
+ switch (blank_mode) {
+ case FB_BLANK_UNBLANK:
+ pr_debug("fb%d: unblank\n", info->node);
+ svga_wcrt_mask(0x36, 0x00, 0x30);
+ svga_wseq_mask(0x01, 0x00, 0x20);
+ break;
+ case FB_BLANK_NORMAL:
+ pr_debug("fb%d: blank\n", info->node);
+ svga_wcrt_mask(0x36, 0x00, 0x30);
+ svga_wseq_mask(0x01, 0x20, 0x20);
+ break;
+ case FB_BLANK_HSYNC_SUSPEND:
+ pr_debug("fb%d: DPMS standby (hsync off)\n", info->node);
+ svga_wcrt_mask(0x36, 0x10, 0x30);
+ svga_wseq_mask(0x01, 0x20, 0x20);
+ break;
+ case FB_BLANK_VSYNC_SUSPEND:
+ pr_debug("fb%d: DPMS suspend (vsync off)\n", info->node);
+ svga_wcrt_mask(0x36, 0x20, 0x30);
+ svga_wseq_mask(0x01, 0x20, 0x20);
+ break;
+ case FB_BLANK_POWERDOWN:
+ pr_debug("fb%d: DPMS off (no sync)\n", info->node);
+ svga_wcrt_mask(0x36, 0x30, 0x30);
+ svga_wseq_mask(0x01, 0x20, 0x20);
+ break;
+ }
+
+ return 0;
+}
+
+
+static int vt8623fb_pan_display(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+ unsigned int offset;
+
+ /* Calculate the offset */
+ if (var->bits_per_pixel == 0) {
+ offset = (var->yoffset / 16) * var->xres_virtual + var->xoffset;
+ offset = offset >> 3;
+ } else {
+ offset = (var->yoffset * info->fix.line_length) +
+ (var->xoffset * var->bits_per_pixel / 8);
+ offset = offset >> ((var->bits_per_pixel == 4) ? 2 : 1);
+ }
+
+ /* Set the offset */
+ svga_wcrt_multi(vt8623_start_address_regs, offset);
+
+ return 0;
+}
+
+
+/* ------------------------------------------------------------------------- */
+
+
+/* Frame buffer operations */
+
+static struct fb_ops vt8623fb_ops = {
+ .owner = THIS_MODULE,
+ .fb_open = vt8623fb_open,
+ .fb_release = vt8623fb_release,
+ .fb_check_var = vt8623fb_check_var,
+ .fb_set_par = vt8623fb_set_par,
+ .fb_setcolreg = vt8623fb_setcolreg,
+ .fb_blank = vt8623fb_blank,
+ .fb_pan_display = vt8623fb_pan_display,
+ .fb_fillrect = vt8623fb_fillrect,
+ .fb_copyarea = cfb_copyarea,
+ .fb_imageblit = vt8623fb_imageblit,
+ .fb_get_caps = svga_get_caps,
+};
+
+
+/* PCI probe */
+
+static int __devinit vt8623_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
+{
+ struct fb_info *info;
+ struct vt8623fb_info *par;
+ unsigned int memsize1, memsize2;
+ int rc;
+
+ /* Ignore secondary VGA device because there is no VGA arbitration */
+ if (! svga_primary_device(dev)) {
+ dev_info(&(dev->dev), "ignoring secondary device\n");
+ return -ENODEV;
+ }
+
+ /* Allocate and fill driver data structure */
+ info = framebuffer_alloc(sizeof(struct vt8623fb_info), NULL);
+ if (! info) {
+ dev_err(&(dev->dev), "cannot allocate memory\n");
+ return -ENOMEM;
+ }
+
+ par = info->par;
+ mutex_init(&par->open_lock);
+
+ info->flags = FBINFO_PARTIAL_PAN_OK | FBINFO_HWACCEL_YPAN;
+ info->fbops = &vt8623fb_ops;
+
+ /* Prepare PCI device */
+
+ rc = pci_enable_device(dev);
+ if (rc < 0) {
+ dev_err(&(dev->dev), "cannot enable PCI device\n");
+ goto err_enable_device;
+ }
+
+ rc = pci_request_regions(dev, "vt8623fb");
+ if (rc < 0) {
+ dev_err(&(dev->dev), "cannot reserve framebuffer region\n");
+ goto err_request_regions;
+ }
+
+ info->fix.smem_start = pci_resource_start(dev, 0);
+ info->fix.smem_len = pci_resource_len(dev, 0);
+ info->fix.mmio_start = pci_resource_start(dev, 1);
+ info->fix.mmio_len = pci_resource_len(dev, 1);
+
+ /* Map physical IO memory address into kernel space */
+ info->screen_base = pci_iomap(dev, 0, 0);
+ if (! info->screen_base) {
+ rc = -ENOMEM;
+ dev_err(&(dev->dev), "iomap for framebuffer failed\n");
+ goto err_iomap_1;
+ }
+
+ par->mmio_base = pci_iomap(dev, 1, 0);
+ if (! par->mmio_base) {
+ rc = -ENOMEM;
+ dev_err(&(dev->dev), "iomap for MMIO failed\n");
+ goto err_iomap_2;
+ }
+
+ /* Find how many physical memory there is on card */
+ memsize1 = (vga_rseq(NULL, 0x34) + 1) >> 1;
+ memsize2 = vga_rseq(NULL, 0x39) << 2;
+
+ if ((16 <= memsize1) && (memsize1 <= 64) && (memsize1 == memsize2))
+ info->screen_size = memsize1 << 20;
+ else {
+ dev_err(&(dev->dev), "memory size detection failed (%x %x), suppose 16 MB\n", memsize1, memsize2);
+ info->screen_size = 16 << 20;
+ }
+
+ info->fix.smem_len = info->screen_size;
+ strcpy(info->fix.id, "VIA VT8623");
+ info->fix.type = FB_TYPE_PACKED_PIXELS;
+ info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
+ info->fix.ypanstep = 0;
+ info->fix.accel = FB_ACCEL_NONE;
+ info->pseudo_palette = (void*)par->pseudo_palette;
+
+ /* Prepare startup mode */
+
+ rc = fb_find_mode(&(info->var), info, mode, NULL, 0, NULL, 8);
+ if (! ((rc == 1) || (rc == 2))) {
+ rc = -EINVAL;
+ dev_err(&(dev->dev), "mode %s not found\n", mode);
+ goto err_find_mode;
+ }
+
+ rc = fb_alloc_cmap(&info->cmap, 256, 0);
+ if (rc < 0) {
+ dev_err(&(dev->dev), "cannot allocate colormap\n");
+ goto err_alloc_cmap;
+ }
+
+ rc = register_framebuffer(info);
+ if (rc < 0) {
+ dev_err(&(dev->dev), "cannot register framebugger\n");
+ goto err_reg_fb;
+ }
+
+ printk(KERN_INFO "fb%d: %s on %s, %d MB RAM\n", info->node, info->fix.id,
+ pci_name(dev), info->fix.smem_len >> 20);
+
+ /* Record a reference to the driver data */
+ pci_set_drvdata(dev, info);
+
+#ifdef CONFIG_MTRR
+ if (mtrr) {
+ par->mtrr_reg = -1;
+ par->mtrr_reg = mtrr_add(info->fix.smem_start, info->fix.smem_len, MTRR_TYPE_WRCOMB, 1);
+ }
+#endif
+
+ return 0;
+
+ /* Error handling */
+err_reg_fb:
+ fb_dealloc_cmap(&info->cmap);
+err_alloc_cmap:
+err_find_mode:
+ pci_iounmap(dev, par->mmio_base);
+err_iomap_2:
+ pci_iounmap(dev, info->screen_base);
+err_iomap_1:
+ pci_release_regions(dev);
+err_request_regions:
+/* pci_disable_device(dev); */
+err_enable_device:
+ framebuffer_release(info);
+ return rc;
+}
+
+/* PCI remove */
+
+static void __devexit vt8623_pci_remove(struct pci_dev *dev)
+{
+ struct fb_info *info = pci_get_drvdata(dev);
+ struct vt8623fb_info *par = info->par;
+
+ if (info) {
+#ifdef CONFIG_MTRR
+ if (par->mtrr_reg >= 0) {
+ mtrr_del(par->mtrr_reg, 0, 0);
+ par->mtrr_reg = -1;
+ }
+#endif
+
+ unregister_framebuffer(info);
+ fb_dealloc_cmap(&info->cmap);
+
+ pci_iounmap(dev, info->screen_base);
+ pci_iounmap(dev, par->mmio_base);
+ pci_release_regions(dev);
+/* pci_disable_device(dev); */
+
+ pci_set_drvdata(dev, NULL);
+ framebuffer_release(info);
+ }
+}
+
+
+#ifdef CONFIG_PM
+/* PCI suspend */
+
+static int vt8623_pci_suspend(struct pci_dev* dev, pm_message_t state)
+{
+ struct fb_info *info = pci_get_drvdata(dev);
+ struct vt8623fb_info *par = info->par;
+
+ dev_info(&(dev->dev), "suspend\n");
+
+ acquire_console_sem();
+ mutex_lock(&(par->open_lock));
+
+ if ((state.event == PM_EVENT_FREEZE) || (par->ref_count == 0)) {
+ mutex_unlock(&(par->open_lock));
+ release_console_sem();
+ return 0;
+ }
+
+ fb_set_suspend(info, 1);
+
+ pci_save_state(dev);
+ pci_disable_device(dev);
+ pci_set_power_state(dev, pci_choose_state(dev, state));
+
+ mutex_unlock(&(par->open_lock));
+ release_console_sem();
+
+ return 0;
+}
+
+
+/* PCI resume */
+
+static int vt8623_pci_resume(struct pci_dev* dev)
+{
+ struct fb_info *info = pci_get_drvdata(dev);
+ struct vt8623fb_info *par = info->par;
+
+ dev_info(&(dev->dev), "resume\n");
+
+ acquire_console_sem();
+ mutex_lock(&(par->open_lock));
+
+ if (par->ref_count == 0) {
+ mutex_unlock(&(par->open_lock));
+ release_console_sem();
+ return 0;
+ }
+
+ pci_set_power_state(dev, PCI_D0);
+ pci_restore_state(dev);
+
+ if (pci_enable_device(dev))
+ goto fail;
+
+ pci_set_master(dev);
+
+ vt8623fb_set_par(info);
+ fb_set_suspend(info, 0);
+
+ mutex_unlock(&(par->open_lock));
+fail:
+ release_console_sem();
+
+ return 0;
+}
+#else
+#define vt8623_pci_suspend NULL
+#define vt8623_pci_resume NULL
+#endif /* CONFIG_PM */
+
+/* List of boards that we are trying to support */
+
+static struct pci_device_id vt8623_devices[] __devinitdata = {
+ {PCI_DEVICE(PCI_VENDOR_ID_VIA, 0x3122)},
+ {0, 0, 0, 0, 0, 0, 0}
+};
+
+MODULE_DEVICE_TABLE(pci, vt8623_devices);
+
+static struct pci_driver vt8623fb_pci_driver = {
+ .name = "vt8623fb",
+ .id_table = vt8623_devices,
+ .probe = vt8623_pci_probe,
+ .remove = __devexit_p(vt8623_pci_remove),
+ .suspend = vt8623_pci_suspend,
+ .resume = vt8623_pci_resume,
+};
+
+/* Cleanup */
+
+static void __exit vt8623fb_cleanup(void)
+{
+ pr_debug("vt8623fb: cleaning up\n");
+ pci_unregister_driver(&vt8623fb_pci_driver);
+}
+
+/* Driver Initialisation */
+
+int __init vt8623fb_init(void)
+{
+
+#ifndef MODULE
+ char *option = NULL;
+
+ if (fb_get_options("vt8623fb", &option))
+ return -ENODEV;
+
+ if (option && *option)
+ mode = option;
+#endif
+
+ pr_debug("vt8623fb: initializing\n");
+ return pci_register_driver(&vt8623fb_pci_driver);
+}
+
+/* ------------------------------------------------------------------------- */
+
+/* Modularization */
+
+module_init(vt8623fb_init);
+module_exit(vt8623fb_cleanup);
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 4aa8079e71b..c8796906f58 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -628,11 +628,7 @@ static int affs_prepare_write_ofs(struct file *file, struct page *page, unsigned
return err;
}
if (to < PAGE_CACHE_SIZE) {
- char *kaddr = kmap_atomic(page, KM_USER0);
-
- memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, to, PAGE_CACHE_SIZE - to, KM_USER0);
if (size > offset + to) {
if (size < offset + PAGE_CACHE_SIZE)
tmp = size & ~PAGE_CACHE_MASK;
diff --git a/fs/afs/Makefile b/fs/afs/Makefile
index cf83e5d6351..73ce561f3ea 100644
--- a/fs/afs/Makefile
+++ b/fs/afs/Makefile
@@ -22,6 +22,7 @@ kafs-objs := \
vlclient.o \
vlocation.o \
vnode.o \
- volume.o
+ volume.o \
+ write.o
obj-$(CONFIG_AFS_FS) := kafs.o
diff --git a/fs/afs/afs_fs.h b/fs/afs/afs_fs.h
index 89e0d1650a7..2198006d2d0 100644
--- a/fs/afs/afs_fs.h
+++ b/fs/afs/afs_fs.h
@@ -18,6 +18,8 @@
enum AFS_FS_Operations {
FSFETCHDATA = 130, /* AFS Fetch file data */
FSFETCHSTATUS = 132, /* AFS Fetch file status */
+ FSSTOREDATA = 133, /* AFS Store file data */
+ FSSTORESTATUS = 135, /* AFS Store file status */
FSREMOVEFILE = 136, /* AFS Remove a file */
FSCREATEFILE = 137, /* AFS Create a file */
FSRENAME = 138, /* AFS Rename or move a file or directory */
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 9bdbf36a9aa..f64e40fefc0 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -44,7 +44,7 @@ void afs_init_callback_state(struct afs_server *server)
while (!RB_EMPTY_ROOT(&server->cb_promises)) {
vnode = rb_entry(server->cb_promises.rb_node,
struct afs_vnode, cb_promise);
- _debug("UNPROMISE { vid=%x vn=%u uq=%u}",
+ _debug("UNPROMISE { vid=%x:%u uq=%u}",
vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
rb_erase(&vnode->cb_promise, &server->cb_promises);
vnode->cb_promised = false;
@@ -84,11 +84,8 @@ void afs_broken_callback_work(struct work_struct *work)
/* if the vnode's data version number changed then its contents
* are different */
- if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- _debug("zap data {%x:%u}",
- vnode->fid.vid, vnode->fid.vnode);
- invalidate_remote_inode(&vnode->vfs_inode);
- }
+ if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
+ afs_zap_data(vnode);
}
out:
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 0c1e902f17a..2fb31276196 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -55,7 +55,8 @@ const struct inode_operations afs_dir_inode_operations = {
.rmdir = afs_rmdir,
.rename = afs_rename,
.permission = afs_permission,
- .getattr = afs_inode_getattr,
+ .getattr = afs_getattr,
+ .setattr = afs_setattr,
};
static struct dentry_operations afs_fs_dentry_operations = {
@@ -491,7 +492,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry,
vnode = AFS_FS_I(dir);
- _enter("{%x:%d},%p{%s},",
+ _enter("{%x:%u},%p{%s},",
vnode->fid.vid, vnode->fid.vnode, dentry, dentry->d_name.name);
ASSERTCMP(dentry->d_inode, ==, NULL);
@@ -731,7 +732,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
dvnode = AFS_FS_I(dir);
- _enter("{%x:%d},{%s},%o",
+ _enter("{%x:%u},{%s},%o",
dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
ret = -ENAMETOOLONG;
@@ -796,7 +797,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
dvnode = AFS_FS_I(dir);
- _enter("{%x:%d},{%s}",
+ _enter("{%x:%u},{%s}",
dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
ret = -ENAMETOOLONG;
@@ -842,7 +843,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
dvnode = AFS_FS_I(dir);
- _enter("{%x:%d},{%s}",
+ _enter("{%x:%u},{%s}",
dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name);
ret = -ENAMETOOLONG;
@@ -916,7 +917,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, int mode,
dvnode = AFS_FS_I(dir);
- _enter("{%x:%d},{%s},%o,",
+ _enter("{%x:%u},{%s},%o,",
dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name, mode);
ret = -ENAMETOOLONG;
@@ -983,7 +984,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
vnode = AFS_FS_I(from->d_inode);
dvnode = AFS_FS_I(dir);
- _enter("{%x:%d},{%x:%d},{%s}",
+ _enter("{%x:%u},{%x:%u},{%s}",
vnode->fid.vid, vnode->fid.vnode,
dvnode->fid.vid, dvnode->fid.vnode,
dentry->d_name.name);
@@ -1032,7 +1033,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
dvnode = AFS_FS_I(dir);
- _enter("{%x:%d},{%s},%s",
+ _enter("{%x:%u},{%s},%s",
dvnode->fid.vid, dvnode->fid.vnode, dentry->d_name.name,
content);
@@ -1104,7 +1105,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
orig_dvnode = AFS_FS_I(old_dir);
new_dvnode = AFS_FS_I(new_dir);
- _enter("{%x:%d},{%x:%d},{%x:%d},{%s}",
+ _enter("{%x:%u},{%x:%u},{%x:%u},{%s}",
orig_dvnode->fid.vid, orig_dvnode->fid.vnode,
vnode->fid.vid, vnode->fid.vnode,
new_dvnode->fid.vid, new_dvnode->fid.vnode,
diff --git a/fs/afs/file.c b/fs/afs/file.c
index ae256498f4f..3e25795e5a4 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -15,32 +15,43 @@
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
+#include <linux/writeback.h>
#include "internal.h"
-static int afs_file_readpage(struct file *file, struct page *page);
-static void afs_file_invalidatepage(struct page *page, unsigned long offset);
-static int afs_file_releasepage(struct page *page, gfp_t gfp_flags);
+static int afs_readpage(struct file *file, struct page *page);
+static void afs_invalidatepage(struct page *page, unsigned long offset);
+static int afs_releasepage(struct page *page, gfp_t gfp_flags);
+static int afs_launder_page(struct page *page);
const struct file_operations afs_file_operations = {
.open = afs_open,
.release = afs_release,
.llseek = generic_file_llseek,
.read = do_sync_read,
+ .write = do_sync_write,
.aio_read = generic_file_aio_read,
+ .aio_write = afs_file_write,
.mmap = generic_file_readonly_mmap,
.sendfile = generic_file_sendfile,
+ .fsync = afs_fsync,
};
const struct inode_operations afs_file_inode_operations = {
- .getattr = afs_inode_getattr,
+ .getattr = afs_getattr,
+ .setattr = afs_setattr,
.permission = afs_permission,
};
const struct address_space_operations afs_fs_aops = {
- .readpage = afs_file_readpage,
- .set_page_dirty = __set_page_dirty_nobuffers,
- .releasepage = afs_file_releasepage,
- .invalidatepage = afs_file_invalidatepage,
+ .readpage = afs_readpage,
+ .set_page_dirty = afs_set_page_dirty,
+ .launder_page = afs_launder_page,
+ .releasepage = afs_releasepage,
+ .invalidatepage = afs_invalidatepage,
+ .prepare_write = afs_prepare_write,
+ .commit_write = afs_commit_write,
+ .writepage = afs_writepage,
+ .writepages = afs_writepages,
};
/*
@@ -52,7 +63,7 @@ int afs_open(struct inode *inode, struct file *file)
struct key *key;
int ret;
- _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode);
+ _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key)) {
@@ -78,7 +89,7 @@ int afs_release(struct inode *inode, struct file *file)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
- _enter("{%x:%x},", vnode->fid.vid, vnode->fid.vnode);
+ _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
key_put(file->private_data);
_leave(" = 0");
@@ -89,10 +100,10 @@ int afs_release(struct inode *inode, struct file *file)
* deal with notification that a page was read from the cache
*/
#ifdef AFS_CACHING_SUPPORT
-static void afs_file_readpage_read_complete(void *cookie_data,
- struct page *page,
- void *data,
- int error)
+static void afs_readpage_read_complete(void *cookie_data,
+ struct page *page,
+ void *data,
+ int error)
{
_enter("%p,%p,%p,%d", cookie_data, page, data, error);
@@ -109,10 +120,10 @@ static void afs_file_readpage_read_complete(void *cookie_data,
* deal with notification that a page was written to the cache
*/
#ifdef AFS_CACHING_SUPPORT
-static void afs_file_readpage_write_complete(void *cookie_data,
- struct page *page,
- void *data,
- int error)
+static void afs_readpage_write_complete(void *cookie_data,
+ struct page *page,
+ void *data,
+ int error)
{
_enter("%p,%p,%p,%d", cookie_data, page, data, error);
@@ -121,9 +132,9 @@ static void afs_file_readpage_write_complete(void *cookie_data,
#endif
/*
- * AFS read page from file (or symlink)
+ * AFS read page from file, directory or symlink
*/
-static int afs_file_readpage(struct file *file, struct page *page)
+static int afs_readpage(struct file *file, struct page *page)
{
struct afs_vnode *vnode;
struct inode *inode;
@@ -219,39 +230,17 @@ error:
}
/*
- * get a page cookie for the specified page
- */
-#ifdef AFS_CACHING_SUPPORT
-int afs_cache_get_page_cookie(struct page *page,
- struct cachefs_page **_page_cookie)
-{
- int ret;
-
- _enter("");
- ret = cachefs_page_get_private(page,_page_cookie, GFP_NOIO);
-
- _leave(" = %d", ret);
- return ret;
-}
-#endif
-
-/*
* invalidate part or all of a page
*/
-static void afs_file_invalidatepage(struct page *page, unsigned long offset)
+static void afs_invalidatepage(struct page *page, unsigned long offset)
{
int ret = 1;
- _enter("{%lu},%lu", page->index, offset);
+ kenter("{%lu},%lu", page->index, offset);
BUG_ON(!PageLocked(page));
if (PagePrivate(page)) {
-#ifdef AFS_CACHING_SUPPORT
- struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
- cachefs_uncache_page(vnode->cache,page);
-#endif
-
/* We release buffers only if the entire page is being
* invalidated.
* The get_block cached value has been unconditionally
@@ -272,25 +261,33 @@ static void afs_file_invalidatepage(struct page *page, unsigned long offset)
}
/*
+ * write back a dirty page
+ */
+static int afs_launder_page(struct page *page)
+{
+ _enter("{%lu}", page->index);
+
+ return 0;
+}
+
+/*
* release a page and cleanup its private data
*/
-static int afs_file_releasepage(struct page *page, gfp_t gfp_flags)
+static int afs_releasepage(struct page *page, gfp_t gfp_flags)
{
- struct cachefs_page *pageio;
+ struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
+ struct afs_writeback *wb;
- _enter("{%lu},%x", page->index, gfp_flags);
+ _enter("{{%x:%u}[%lu],%lx},%x",
+ vnode->fid.vid, vnode->fid.vnode, page->index, page->flags,
+ gfp_flags);
if (PagePrivate(page)) {
-#ifdef AFS_CACHING_SUPPORT
- struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
- cachefs_uncache_page(vnode->cache, page);
-#endif
-
- pageio = (struct cachefs_page *) page_private(page);
+ wb = (struct afs_writeback *) page_private(page);
+ ASSERT(wb != NULL);
set_page_private(page, 0);
ClearPagePrivate(page);
-
- kfree(pageio);
+ afs_put_writeback(wb);
}
_leave(" = 0");
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index e54e6c2ad34..025b1903d9e 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -33,8 +33,10 @@ static void xdr_decode_AFSFid(const __be32 **_bp, struct afs_fid *fid)
*/
static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
struct afs_file_status *status,
- struct afs_vnode *vnode)
+ struct afs_vnode *vnode,
+ afs_dataversion_t *store_version)
{
+ afs_dataversion_t expected_version;
const __be32 *bp = *_bp;
umode_t mode;
u64 data_version, size;
@@ -101,7 +103,11 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
vnode->vfs_inode.i_atime = vnode->vfs_inode.i_ctime;
}
- if (status->data_version != data_version) {
+ expected_version = status->data_version;
+ if (store_version)
+ expected_version = *store_version;
+
+ if (expected_version != data_version) {
status->data_version = data_version;
if (vnode && !test_bit(AFS_VNODE_UNSET, &vnode->flags)) {
_debug("vnode modified %llx on {%x:%u}",
@@ -110,6 +116,8 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
set_bit(AFS_VNODE_MODIFIED, &vnode->flags);
set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags);
}
+ } else if (store_version) {
+ status->data_version = data_version;
}
}
@@ -156,6 +164,44 @@ static void xdr_decode_AFSVolSync(const __be32 **_bp,
}
/*
+ * encode the requested attributes into an AFSStoreStatus block
+ */
+static void xdr_encode_AFS_StoreStatus(__be32 **_bp, struct iattr *attr)
+{
+ __be32 *bp = *_bp;
+ u32 mask = 0, mtime = 0, owner = 0, group = 0, mode = 0;
+
+ mask = 0;
+ if (attr->ia_valid & ATTR_MTIME) {
+ mask |= AFS_SET_MTIME;
+ mtime = attr->ia_mtime.tv_sec;
+ }
+
+ if (attr->ia_valid & ATTR_UID) {
+ mask |= AFS_SET_OWNER;
+ owner = attr->ia_uid;
+ }
+
+ if (attr->ia_valid & ATTR_GID) {
+ mask |= AFS_SET_GROUP;
+ group = attr->ia_gid;
+ }
+
+ if (attr->ia_valid & ATTR_MODE) {
+ mask |= AFS_SET_MODE;
+ mode = attr->ia_mode & S_IALLUGO;
+ }
+
+ *bp++ = htonl(mask);
+ *bp++ = htonl(mtime);
+ *bp++ = htonl(owner);
+ *bp++ = htonl(group);
+ *bp++ = htonl(mode);
+ *bp++ = 0; /* segment size */
+ *_bp = bp;
+}
+
+/*
* deliver reply data to an FS.FetchStatus
*/
static int afs_deliver_fs_fetch_status(struct afs_call *call,
@@ -175,7 +221,7 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call,
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+ xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
xdr_decode_AFSCallBack(&bp, vnode);
if (call->reply2)
xdr_decode_AFSVolSync(&bp, call->reply2);
@@ -206,7 +252,7 @@ int afs_fs_fetch_file_status(struct afs_server *server,
struct afs_call *call;
__be32 *bp;
- _enter(",%x,{%x:%d},,",
+ _enter(",%x,{%x:%u},,",
key_serial(key), vnode->fid.vid, vnode->fid.vnode);
call = afs_alloc_flat_call(&afs_RXFSFetchStatus, 16, (21 + 3 + 6) * 4);
@@ -265,25 +311,20 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
call->offset = 0;
call->unmarshall++;
- if (call->count < PAGE_SIZE) {
- page = call->reply3;
- buffer = kmap_atomic(page, KM_USER0);
- memset(buffer + PAGE_SIZE - call->count, 0,
- call->count);
- kunmap_atomic(buffer, KM_USER0);
- }
-
/* extract the returned data */
case 2:
_debug("extract data");
- page = call->reply3;
- buffer = kmap_atomic(page, KM_USER0);
- ret = afs_extract_data(call, skb, last, buffer, call->count);
- kunmap_atomic(buffer, KM_USER0);
- switch (ret) {
- case 0: break;
- case -EAGAIN: return 0;
- default: return ret;
+ if (call->count > 0) {
+ page = call->reply3;
+ buffer = kmap_atomic(page, KM_USER0);
+ ret = afs_extract_data(call, skb, last, buffer,
+ call->count);
+ kunmap_atomic(buffer, KM_USER0);
+ switch (ret) {
+ case 0: break;
+ case -EAGAIN: return 0;
+ default: return ret;
+ }
}
call->offset = 0;
@@ -300,7 +341,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
}
bp = call->buffer;
- xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+ xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
xdr_decode_AFSCallBack(&bp, vnode);
if (call->reply2)
xdr_decode_AFSVolSync(&bp, call->reply2);
@@ -318,6 +359,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call,
if (!last)
return 0;
+ if (call->count < PAGE_SIZE) {
+ _debug("clear");
+ page = call->reply3;
+ buffer = kmap_atomic(page, KM_USER0);
+ memset(buffer + call->count, 0, PAGE_SIZE - call->count);
+ kunmap_atomic(buffer, KM_USER0);
+ }
+
_leave(" = 0 [done]");
return 0;
}
@@ -476,8 +525,8 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call,
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_AFSFid(&bp, call->reply2);
- xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL);
- xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+ xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL);
+ xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
xdr_decode_AFSCallBack_raw(&bp, call->reply4);
/* xdr_decode_AFSVolSync(&bp, call->replyX); */
@@ -574,7 +623,7 @@ static int afs_deliver_fs_remove(struct afs_call *call,
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+ xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
/* xdr_decode_AFSVolSync(&bp, call->replyX); */
_leave(" = 0 [done]");
@@ -657,8 +706,8 @@ static int afs_deliver_fs_link(struct afs_call *call,
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
- xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode);
+ xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
+ xdr_decode_AFSFetchStatus(&bp, &dvnode->status, dvnode, NULL);
/* xdr_decode_AFSVolSync(&bp, call->replyX); */
_leave(" = 0 [done]");
@@ -746,8 +795,8 @@ static int afs_deliver_fs_symlink(struct afs_call *call,
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
xdr_decode_AFSFid(&bp, call->reply2);
- xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL);
- xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode);
+ xdr_decode_AFSFetchStatus(&bp, call->reply3, NULL, NULL);
+ xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL);
/* xdr_decode_AFSVolSync(&bp, call->replyX); */
_leave(" = 0 [done]");
@@ -852,9 +901,10 @@ static int afs_deliver_fs_rename(struct afs_call *call,
/* unmarshall the reply once we've received all of it */
bp = call->buffer;
- xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode);
+ xdr_decode_AFSFetchStatus(&bp, &orig_dvnode->status, orig_dvnode, NULL);
if (new_dvnode != orig_dvnode)
- xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode);
+ xdr_decode_AFSFetchStatus(&bp, &new_dvnode->status, new_dvnode,
+ NULL);
/* xdr_decode_AFSVolSync(&bp, call->replyX); */
_leave(" = 0 [done]");
@@ -936,3 +986,262 @@ int afs_fs_rename(struct afs_server *server,
return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
}
+
+/*
+ * deliver reply data to an FS.StoreData
+ */
+static int afs_deliver_fs_store_data(struct afs_call *call,
+ struct sk_buff *skb, bool last)
+{
+ struct afs_vnode *vnode = call->reply;
+ const __be32 *bp;
+
+ _enter(",,%u", last);
+
+ afs_transfer_reply(call, skb);
+ if (!last) {
+ _leave(" = 0 [more]");
+ return 0;
+ }
+
+ if (call->reply_size != call->reply_max) {
+ _leave(" = -EBADMSG [%u != %u]",
+ call->reply_size, call->reply_max);
+ return -EBADMSG;
+ }
+
+ /* unmarshall the reply once we've received all of it */
+ bp = call->buffer;
+ xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode,
+ &call->store_version);
+ /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+
+ afs_pages_written_back(vnode, call);
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.StoreData operation type
+ */
+static const struct afs_call_type afs_RXFSStoreData = {
+ .name = "FS.StoreData",
+ .deliver = afs_deliver_fs_store_data,
+ .abort_to_error = afs_abort_to_error,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * store a set of pages
+ */
+int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb,
+ pgoff_t first, pgoff_t last,
+ unsigned offset, unsigned to,
+ const struct afs_wait_mode *wait_mode)
+{
+ struct afs_vnode *vnode = wb->vnode;
+ struct afs_call *call;
+ loff_t size, pos, i_size;
+ __be32 *bp;
+
+ _enter(",%x,{%x:%u},,",
+ key_serial(wb->key), vnode->fid.vid, vnode->fid.vnode);
+
+ size = to - offset;
+ if (first != last)
+ size += (loff_t)(last - first) << PAGE_SHIFT;
+ pos = (loff_t)first << PAGE_SHIFT;
+ pos += offset;
+
+ i_size = i_size_read(&vnode->vfs_inode);
+ if (pos + size > i_size)
+ i_size = size + pos;
+
+ _debug("size %llx, at %llx, i_size %llx",
+ (unsigned long long) size, (unsigned long long) pos,
+ (unsigned long long) i_size);
+
+ BUG_ON(i_size > 0xffffffff); // TODO: use 64-bit store
+
+ call = afs_alloc_flat_call(&afs_RXFSStoreData,
+ (4 + 6 + 3) * 4,
+ (21 + 6) * 4);
+ if (!call)
+ return -ENOMEM;
+
+ call->wb = wb;
+ call->key = wb->key;
+ call->reply = vnode;
+ call->service_id = FS_SERVICE;
+ call->port = htons(AFS_FS_PORT);
+ call->mapping = vnode->vfs_inode.i_mapping;
+ call->first = first;
+ call->last = last;
+ call->first_offset = offset;
+ call->last_to = to;
+ call->send_pages = true;
+ call->store_version = vnode->status.data_version + 1;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSSTOREDATA);
+ *bp++ = htonl(vnode->fid.vid);
+ *bp++ = htonl(vnode->fid.vnode);
+ *bp++ = htonl(vnode->fid.unique);
+
+ *bp++ = 0; /* mask */
+ *bp++ = 0; /* mtime */
+ *bp++ = 0; /* owner */
+ *bp++ = 0; /* group */
+ *bp++ = 0; /* unix mode */
+ *bp++ = 0; /* segment size */
+
+ *bp++ = htonl(pos);
+ *bp++ = htonl(size);
+ *bp++ = htonl(i_size);
+
+ return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
+
+/*
+ * deliver reply data to an FS.StoreStatus
+ */
+static int afs_deliver_fs_store_status(struct afs_call *call,
+ struct sk_buff *skb, bool last)
+{
+ afs_dataversion_t *store_version;
+ struct afs_vnode *vnode = call->reply;
+ const __be32 *bp;
+
+ _enter(",,%u", last);
+
+ afs_transfer_reply(call, skb);
+ if (!last) {
+ _leave(" = 0 [more]");
+ return 0;
+ }
+
+ if (call->reply_size != call->reply_max) {
+ _leave(" = -EBADMSG [%u != %u]",
+ call->reply_size, call->reply_max);
+ return -EBADMSG;
+ }
+
+ /* unmarshall the reply once we've received all of it */
+ store_version = NULL;
+ if (call->operation_ID == FSSTOREDATA)
+ store_version = &call->store_version;
+
+ bp = call->buffer;
+ xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, store_version);
+ /* xdr_decode_AFSVolSync(&bp, call->replyX); */
+
+ _leave(" = 0 [done]");
+ return 0;
+}
+
+/*
+ * FS.StoreStatus operation type
+ */
+static const struct afs_call_type afs_RXFSStoreStatus = {
+ .name = "FS.StoreStatus",
+ .deliver = afs_deliver_fs_store_status,
+ .abort_to_error = afs_abort_to_error,
+ .destructor = afs_flat_call_destructor,
+};
+
+static const struct afs_call_type afs_RXFSStoreData_as_Status = {
+ .name = "FS.StoreData",
+ .deliver = afs_deliver_fs_store_status,
+ .abort_to_error = afs_abort_to_error,
+ .destructor = afs_flat_call_destructor,
+};
+
+/*
+ * set the attributes on a file, using FS.StoreData rather than FS.StoreStatus
+ * so as to alter the file size also
+ */
+static int afs_fs_setattr_size(struct afs_server *server, struct key *key,
+ struct afs_vnode *vnode, struct iattr *attr,
+ const struct afs_wait_mode *wait_mode)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ _enter(",%x,{%x:%u},,",
+ key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+
+ ASSERT(attr->ia_valid & ATTR_SIZE);
+ ASSERTCMP(attr->ia_size, <=, 0xffffffff); // TODO: use 64-bit store
+
+ call = afs_alloc_flat_call(&afs_RXFSStoreData_as_Status,
+ (4 + 6 + 3) * 4,
+ (21 + 6) * 4);
+ if (!call)
+ return -ENOMEM;
+
+ call->key = key;
+ call->reply = vnode;
+ call->service_id = FS_SERVICE;
+ call->port = htons(AFS_FS_PORT);
+ call->store_version = vnode->status.data_version + 1;
+ call->operation_ID = FSSTOREDATA;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSSTOREDATA);
+ *bp++ = htonl(vnode->fid.vid);
+ *bp++ = htonl(vnode->fid.vnode);
+ *bp++ = htonl(vnode->fid.unique);
+
+ xdr_encode_AFS_StoreStatus(&bp, attr);
+
+ *bp++ = 0; /* position of start of write */
+ *bp++ = 0; /* size of write */
+ *bp++ = htonl(attr->ia_size); /* new file length */
+
+ return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
+
+/*
+ * set the attributes on a file, using FS.StoreData if there's a change in file
+ * size, and FS.StoreStatus otherwise
+ */
+int afs_fs_setattr(struct afs_server *server, struct key *key,
+ struct afs_vnode *vnode, struct iattr *attr,
+ const struct afs_wait_mode *wait_mode)
+{
+ struct afs_call *call;
+ __be32 *bp;
+
+ if (attr->ia_valid & ATTR_SIZE)
+ return afs_fs_setattr_size(server, key, vnode, attr,
+ wait_mode);
+
+ _enter(",%x,{%x:%u},,",
+ key_serial(key), vnode->fid.vid, vnode->fid.vnode);
+
+ call = afs_alloc_flat_call(&afs_RXFSStoreStatus,
+ (4 + 6) * 4,
+ (21 + 6) * 4);
+ if (!call)
+ return -ENOMEM;
+
+ call->key = key;
+ call->reply = vnode;
+ call->service_id = FS_SERVICE;
+ call->port = htons(AFS_FS_PORT);
+ call->operation_ID = FSSTORESTATUS;
+
+ /* marshall the parameters */
+ bp = call->request;
+ *bp++ = htonl(FSSTORESTATUS);
+ *bp++ = htonl(vnode->fid.vid);
+ *bp++ = htonl(vnode->fid.vnode);
+ *bp++ = htonl(vnode->fid.unique);
+
+ xdr_encode_AFS_StoreStatus(&bp, attr);
+
+ return afs_make_call(&server->addr, call, GFP_NOFS, wait_mode);
+}
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index c184a4ee599..515a5d12d8f 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -125,7 +125,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key,
struct inode *inode;
int ret;
- _enter(",{%u,%u,%u},,", fid->vid, fid->vnode, fid->unique);
+ _enter(",{%x:%u.%u},,", fid->vid, fid->vnode, fid->unique);
as = sb->s_fs_info;
data.volume = as->volume;
@@ -204,6 +204,19 @@ bad_inode:
}
/*
+ * mark the data attached to an inode as obsolete due to a write on the server
+ * - might also want to ditch all the outstanding writes and dirty pages
+ */
+void afs_zap_data(struct afs_vnode *vnode)
+{
+ _enter("zap data {%x:%u}", vnode->fid.vid, vnode->fid.vnode);
+
+ /* nuke all the non-dirty pages that aren't locked, mapped or being
+ * written back */
+ invalidate_remote_inode(&vnode->vfs_inode);
+}
+
+/*
* validate a vnode/inode
* - there are several things we need to check
* - parent dir data changes (rm, rmdir, rename, mkdir, create, link,
@@ -258,10 +271,8 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
/* if the vnode's data version number changed then its contents are
* different */
- if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) {
- _debug("zap data {%x:%d}", vnode->fid.vid, vnode->fid.vnode);
- invalidate_remote_inode(&vnode->vfs_inode);
- }
+ if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags))
+ afs_zap_data(vnode);
clear_bit(AFS_VNODE_MODIFIED, &vnode->flags);
mutex_unlock(&vnode->validate_lock);
@@ -278,7 +289,7 @@ error_unlock:
/*
* read the attributes of an inode
*/
-int afs_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
+int afs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
struct inode *inode;
@@ -301,7 +312,7 @@ void afs_clear_inode(struct inode *inode)
vnode = AFS_FS_I(inode);
- _enter("{%x:%d.%d} v=%u x=%u t=%u }",
+ _enter("{%x:%u.%d} v=%u x=%u t=%u }",
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
@@ -323,6 +334,7 @@ void afs_clear_inode(struct inode *inode)
vnode->server = NULL;
}
+ ASSERT(list_empty(&vnode->writebacks));
ASSERT(!vnode->cb_promised);
#ifdef AFS_CACHING_SUPPORT
@@ -339,3 +351,47 @@ void afs_clear_inode(struct inode *inode)
_leave("");
}
+
+/*
+ * set the attributes of an inode
+ */
+int afs_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+ struct key *key;
+ int ret;
+
+ _enter("{%x:%u},{n=%s},%x",
+ vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+ attr->ia_valid);
+
+ if (!(attr->ia_valid & (ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID |
+ ATTR_MTIME))) {
+ _leave(" = 0 [unsupported]");
+ return 0;
+ }
+
+ /* flush any dirty data outstanding on a regular file */
+ if (S_ISREG(vnode->vfs_inode.i_mode)) {
+ filemap_write_and_wait(vnode->vfs_inode.i_mapping);
+ afs_writeback_all(vnode);
+ }
+
+ if (attr->ia_valid & ATTR_FILE) {
+ key = attr->ia_file->private_data;
+ } else {
+ key = afs_request_key(vnode->volume->cell);
+ if (IS_ERR(key)) {
+ ret = PTR_ERR(key);
+ goto error;
+ }
+ }
+
+ ret = afs_vnode_setattr(vnode, key, attr);
+ if (!(attr->ia_valid & ATTR_FILE))
+ key_put(key);
+
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index d90c158cd93..a30d4fa768e 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -21,6 +21,7 @@
#define AFS_CELL_MAX_ADDRS 15
+struct pagevec;
struct afs_call;
typedef enum {
@@ -75,12 +76,15 @@ struct afs_call {
struct key *key; /* security for this call */
struct afs_server *server; /* server affected by incoming CM call */
void *request; /* request data (first part) */
- void *request2; /* request data (second part) */
+ struct address_space *mapping; /* page set */
+ struct afs_writeback *wb; /* writeback being performed */
void *buffer; /* reply receive buffer */
void *reply; /* reply buffer (first part) */
void *reply2; /* reply buffer (second part) */
void *reply3; /* reply buffer (third part) */
void *reply4; /* reply buffer (fourth part) */
+ pgoff_t first; /* first page in mapping to deal with */
+ pgoff_t last; /* last page in mapping to deal with */
enum { /* call state */
AFS_CALL_REQUESTING, /* request is being sent for outgoing call */
AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */
@@ -97,14 +101,18 @@ struct afs_call {
unsigned request_size; /* size of request data */
unsigned reply_max; /* maximum size of reply */
unsigned reply_size; /* current size of reply */
+ unsigned first_offset; /* offset into mapping[first] */
+ unsigned last_to; /* amount of mapping[last] */
unsigned short offset; /* offset into received data store */
unsigned char unmarshall; /* unmarshalling phase */
bool incoming; /* T if incoming call */
+ bool send_pages; /* T if data from mapping should be sent */
u16 service_id; /* RxRPC service ID to call */
__be16 port; /* target UDP port */
__be32 operation_ID; /* operation ID for an incoming call */
u32 count; /* count for use in unmarshalling */
__be32 tmp; /* place to extract temporary data */
+ afs_dataversion_t store_version; /* updated version expected from store */
};
struct afs_call_type {
@@ -124,6 +132,32 @@ struct afs_call_type {
};
/*
+ * record of an outstanding writeback on a vnode
+ */
+struct afs_writeback {
+ struct list_head link; /* link in vnode->writebacks */
+ struct work_struct writer; /* work item to perform the writeback */
+ struct afs_vnode *vnode; /* vnode to which this write applies */
+ struct key *key; /* owner of this write */
+ wait_queue_head_t waitq; /* completion and ready wait queue */
+ pgoff_t first; /* first page in batch */
+ pgoff_t point; /* last page in current store op */
+ pgoff_t last; /* last page in batch (inclusive) */
+ unsigned offset_first; /* offset into first page of start of write */
+ unsigned to_last; /* offset into last page of end of write */
+ int num_conflicts; /* count of conflicting writes in list */
+ int usage;
+ bool conflicts; /* T if has dependent conflicts */
+ enum {
+ AFS_WBACK_SYNCING, /* synchronisation being performed */
+ AFS_WBACK_PENDING, /* write pending */
+ AFS_WBACK_CONFLICTING, /* conflicting writes posted */
+ AFS_WBACK_WRITING, /* writing back */
+ AFS_WBACK_COMPLETE /* the writeback record has been unlinked */
+ } state __attribute__((packed));
+};
+
+/*
* AFS superblock private data
* - there's one superblock per volume
*/
@@ -305,6 +339,7 @@ struct afs_vnode {
wait_queue_head_t update_waitq; /* status fetch waitqueue */
int update_cnt; /* number of outstanding ops that will update the
* status */
+ spinlock_t writeback_lock; /* lock for writebacks */
spinlock_t lock; /* waitqueue/flags lock */
unsigned long flags;
#define AFS_VNODE_CB_BROKEN 0 /* set if vnode's callback was broken */
@@ -316,6 +351,8 @@ struct afs_vnode {
long acl_order; /* ACL check count (callback break count) */
+ struct list_head writebacks; /* alterations in pagecache that need writing */
+
/* outstanding callback notification on this file */
struct rb_node server_rb; /* link in server->fs_vnodes */
struct rb_node cb_promise; /* link in server->cb_promises */
@@ -433,10 +470,6 @@ extern const struct file_operations afs_file_operations;
extern int afs_open(struct inode *, struct file *);
extern int afs_release(struct inode *, struct file *);
-#ifdef AFS_CACHING_SUPPORT
-extern int afs_cache_get_page_cookie(struct page *, struct cachefs_page **);
-#endif
-
/*
* fsclient.c
*/
@@ -467,6 +500,12 @@ extern int afs_fs_rename(struct afs_server *, struct key *,
struct afs_vnode *, const char *,
struct afs_vnode *, const char *,
const struct afs_wait_mode *);
+extern int afs_fs_store_data(struct afs_server *, struct afs_writeback *,
+ pgoff_t, pgoff_t, unsigned, unsigned,
+ const struct afs_wait_mode *);
+extern int afs_fs_setattr(struct afs_server *, struct key *,
+ struct afs_vnode *, struct iattr *,
+ const struct afs_wait_mode *);
/*
* inode.c
@@ -474,10 +513,10 @@ extern int afs_fs_rename(struct afs_server *, struct key *,
extern struct inode *afs_iget(struct super_block *, struct key *,
struct afs_fid *, struct afs_file_status *,
struct afs_callback *);
+extern void afs_zap_data(struct afs_vnode *);
extern int afs_validate(struct afs_vnode *, struct key *);
-extern int afs_inode_getattr(struct vfsmount *, struct dentry *,
- struct kstat *);
-extern void afs_zap_permits(struct rcu_head *);
+extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+extern int afs_setattr(struct dentry *, struct iattr *);
extern void afs_clear_inode(struct inode *);
/*
@@ -533,6 +572,7 @@ extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *,
*/
extern void afs_clear_permits(struct afs_vnode *);
extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
+extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
extern int afs_permission(struct inode *, int, struct nameidata *);
@@ -629,6 +669,9 @@ extern int afs_vnode_symlink(struct afs_vnode *, struct key *, const char *,
struct afs_file_status *, struct afs_server **);
extern int afs_vnode_rename(struct afs_vnode *, struct afs_vnode *,
struct key *, const char *, const char *);
+extern int afs_vnode_store_data(struct afs_writeback *, pgoff_t, pgoff_t,
+ unsigned, unsigned);
+extern int afs_vnode_setattr(struct afs_vnode *, struct key *, struct iattr *);
/*
* volume.c
@@ -645,6 +688,23 @@ extern struct afs_server *afs_volume_pick_fileserver(struct afs_vnode *);
extern int afs_volume_release_fileserver(struct afs_vnode *,
struct afs_server *, int);
+/*
+ * write.c
+ */
+extern int afs_set_page_dirty(struct page *);
+extern void afs_put_writeback(struct afs_writeback *);
+extern int afs_prepare_write(struct file *, struct page *, unsigned, unsigned);
+extern int afs_commit_write(struct file *, struct page *, unsigned, unsigned);
+extern int afs_writepage(struct page *, struct writeback_control *);
+extern int afs_writepages(struct address_space *, struct writeback_control *);
+extern int afs_write_inode(struct inode *, int);
+extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
+extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+extern int afs_writeback_all(struct afs_vnode *);
+extern int afs_fsync(struct file *, struct dentry *, int);
+
+
/*****************************************************************************/
/*
* debug tracing
@@ -726,6 +786,21 @@ do { \
} \
} while(0)
+#define ASSERTRANGE(L, OP1, N, OP2, H) \
+do { \
+ if (unlikely(!((L) OP1 (N)) || !((N) OP2 (H)))) { \
+ printk(KERN_ERR "\n"); \
+ printk(KERN_ERR "AFS: Assertion failed\n"); \
+ printk(KERN_ERR "%lu "#OP1" %lu "#OP2" %lu is false\n", \
+ (unsigned long)(L), (unsigned long)(N), \
+ (unsigned long)(H)); \
+ printk(KERN_ERR "0x%lx "#OP1" 0x%lx "#OP2" 0x%lx is false\n", \
+ (unsigned long)(L), (unsigned long)(N), \
+ (unsigned long)(H)); \
+ BUG(); \
+ } \
+} while(0)
+
#define ASSERTIF(C, X) \
do { \
if (unlikely((C) && !(X))) { \
@@ -758,6 +833,10 @@ do { \
do { \
} while(0)
+#define ASSERTRANGE(L, OP1, N, OP2, H) \
+do { \
+} while(0)
+
#define ASSERTIF(C, X) \
do { \
} while(0)
diff --git a/fs/afs/main.c b/fs/afs/main.c
index 80ec6fd19a7..f1f71ff7d5c 100644
--- a/fs/afs/main.c
+++ b/fs/afs/main.c
@@ -149,6 +149,7 @@ error_cache:
afs_vlocation_purge();
afs_cell_purge();
afs_proc_cleanup();
+ rcu_barrier();
printk(KERN_ERR "kAFS: failed to register: %d\n", ret);
return ret;
}
@@ -176,6 +177,7 @@ static void __exit afs_exit(void)
cachefs_unregister_netfs(&afs_cache_netfs);
#endif
afs_proc_cleanup();
+ rcu_barrier();
}
module_exit(afs_exit);
diff --git a/fs/afs/misc.c b/fs/afs/misc.c
index cdb9792d816..d1a889c4074 100644
--- a/fs/afs/misc.c
+++ b/fs/afs/misc.c
@@ -22,6 +22,7 @@ int afs_abort_to_error(u32 abort_code)
{
switch (abort_code) {
case 13: return -EACCES;
+ case 27: return -EFBIG;
case 30: return -EROFS;
case VSALVAGE: return -EIO;
case VNOVNODE: return -ENOENT;
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 034fcfd4e33..a3684dcc76e 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -36,7 +36,7 @@ const struct inode_operations afs_mntpt_inode_operations = {
.lookup = afs_mntpt_lookup,
.follow_link = afs_mntpt_follow_link,
.readlink = page_readlink,
- .getattr = afs_inode_getattr,
+ .getattr = afs_getattr,
};
static LIST_HEAD(afs_vfsmounts);
@@ -58,7 +58,8 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
char *buf;
int ret;
- _enter("{%u,%u}", vnode->fid.vnode, vnode->fid.unique);
+ _enter("{%x:%u,%u}",
+ vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
/* read the contents of the symlink into the pagecache */
page = read_mapping_page(AFS_VNODE_TO_I(vnode)->i_mapping, 0, &file);
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 222c1a3abbb..04189c47d6a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -237,6 +237,70 @@ void afs_flat_call_destructor(struct afs_call *call)
}
/*
+ * attach the data from a bunch of pages on an inode to a call
+ */
+int afs_send_pages(struct afs_call *call, struct msghdr *msg, struct kvec *iov)
+{
+ struct page *pages[8];
+ unsigned count, n, loop, offset, to;
+ pgoff_t first = call->first, last = call->last;
+ int ret;
+
+ _enter("");
+
+ offset = call->first_offset;
+ call->first_offset = 0;
+
+ do {
+ _debug("attach %lx-%lx", first, last);
+
+ count = last - first + 1;
+ if (count > ARRAY_SIZE(pages))
+ count = ARRAY_SIZE(pages);
+ n = find_get_pages_contig(call->mapping, first, count, pages);
+ ASSERTCMP(n, ==, count);
+
+ loop = 0;
+ do {
+ msg->msg_flags = 0;
+ to = PAGE_SIZE;
+ if (first + loop >= last)
+ to = call->last_to;
+ else
+ msg->msg_flags = MSG_MORE;
+ iov->iov_base = kmap(pages[loop]) + offset;
+ iov->iov_len = to - offset;
+ offset = 0;
+
+ _debug("- range %u-%u%s",
+ offset, to, msg->msg_flags ? " [more]" : "");
+ msg->msg_iov = (struct iovec *) iov;
+ msg->msg_iovlen = 1;
+
+ /* have to change the state *before* sending the last
+ * packet as RxRPC might give us the reply before it
+ * returns from sending the request */
+ if (first + loop >= last)
+ call->state = AFS_CALL_AWAIT_REPLY;
+ ret = rxrpc_kernel_send_data(call->rxcall, msg,
+ to - offset);
+ kunmap(pages[loop]);
+ if (ret < 0)
+ break;
+ } while (++loop < count);
+ first += count;
+
+ for (loop = 0; loop < count; loop++)
+ put_page(pages[loop]);
+ if (ret < 0)
+ break;
+ } while (first < last);
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
* initiate a call
*/
int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
@@ -253,8 +317,9 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
ASSERT(call->type != NULL);
ASSERT(call->type->name != NULL);
- _debug("MAKE %p{%s} [%d]",
- call, call->type->name, atomic_read(&afs_outstanding_calls));
+ _debug("____MAKE %p{%s,%x} [%d]____",
+ call, call->type->name, key_serial(call->key),
+ atomic_read(&afs_outstanding_calls));
call->wait_mode = wait_mode;
INIT_WORK(&call->async_work, afs_process_async_call);
@@ -289,16 +354,23 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
msg.msg_iovlen = 1;
msg.msg_control = NULL;
msg.msg_controllen = 0;
- msg.msg_flags = 0;
+ msg.msg_flags = (call->send_pages ? MSG_MORE : 0);
/* have to change the state *before* sending the last packet as RxRPC
* might give us the reply before it returns from sending the
* request */
- call->state = AFS_CALL_AWAIT_REPLY;
+ if (!call->send_pages)
+ call->state = AFS_CALL_AWAIT_REPLY;
ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size);
if (ret < 0)
goto error_do_abort;
+ if (call->send_pages) {
+ ret = afs_send_pages(call, &msg, iov);
+ if (ret < 0)
+ goto error_do_abort;
+ }
+
/* at this point, an async call may no longer exist as it may have
* already completed */
return wait_mode->wait(call);
diff --git a/fs/afs/security.c b/fs/afs/security.c
index f9f424d8045..e0ea88b63eb 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -109,7 +109,7 @@ void afs_clear_permits(struct afs_vnode *vnode)
{
struct afs_permits *permits;
- _enter("{%x}", vnode->fid.vnode);
+ _enter("{%x:%u}", vnode->fid.vid, vnode->fid.vnode);
mutex_lock(&vnode->permits_lock);
permits = vnode->permits;
@@ -132,7 +132,8 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, long acl_order)
struct afs_vnode *auth_vnode;
int count, loop;
- _enter("{%x},%x,%lx", vnode->fid.vnode, key_serial(key), acl_order);
+ _enter("{%x:%u},%x,%lx",
+ vnode->fid.vid, vnode->fid.vnode, key_serial(key), acl_order);
auth_vnode = afs_get_auth_inode(vnode, key);
if (IS_ERR(auth_vnode)) {
@@ -220,7 +221,8 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
bool valid;
int loop, ret;
- _enter("");
+ _enter("{%x:%u},%x",
+ vnode->fid.vid, vnode->fid.vnode, key_serial(key));
auth_vnode = afs_get_auth_inode(vnode, key);
if (IS_ERR(auth_vnode)) {
@@ -268,9 +270,9 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
_leave(" = %d", ret);
return ret;
}
+ *_access = vnode->status.caller_access;
}
- *_access = vnode->status.caller_access;
iput(&auth_vnode->vfs_inode);
_leave(" = 0 [access %x]", *_access);
return 0;
@@ -288,7 +290,7 @@ int afs_permission(struct inode *inode, int mask, struct nameidata *nd)
struct key *key;
int ret;
- _enter("{{%x:%x},%lx},%x,",
+ _enter("{{%x:%u},%lx},%x,",
vnode->fid.vid, vnode->fid.vnode, vnode->flags, mask);
key = afs_request_key(vnode->volume->cell);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 96bb23b476a..231ae415027 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -252,6 +252,9 @@ static void afs_destroy_server(struct afs_server *server)
{
_enter("%p", server);
+ ASSERTIF(server->cb_break_head != server->cb_break_tail,
+ delayed_work_pending(&server->cb_break_work));
+
ASSERTCMP(server->fs_vnodes.rb_node, ==, NULL);
ASSERTCMP(server->cb_promises.rb_node, ==, NULL);
ASSERTCMP(server->cb_break_head, ==, server->cb_break_tail);
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 7030d76155f..d24be334b60 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -50,6 +50,7 @@ static const struct super_operations afs_super_ops = {
.statfs = simple_statfs,
.alloc_inode = afs_alloc_inode,
.drop_inode = generic_delete_inode,
+ .write_inode = afs_write_inode,
.destroy_inode = afs_destroy_inode,
.clear_inode = afs_clear_inode,
.umount_begin = afs_umount_begin,
@@ -66,7 +67,7 @@ enum {
afs_opt_vol,
};
-static const match_table_t afs_options_list = {
+static match_table_t afs_options_list = {
{ afs_opt_cell, "cell=%s" },
{ afs_opt_rwpath, "rwpath" },
{ afs_opt_vol, "vol=%s" },
@@ -459,7 +460,9 @@ static void afs_i_init_once(void *_vnode, struct kmem_cache *cachep,
init_waitqueue_head(&vnode->update_waitq);
mutex_init(&vnode->permits_lock);
mutex_init(&vnode->validate_lock);
+ spin_lock_init(&vnode->writeback_lock);
spin_lock_init(&vnode->lock);
+ INIT_LIST_HEAD(&vnode->writebacks);
INIT_WORK(&vnode->cb_broken_work, afs_broken_callback_work);
}
}
diff --git a/fs/afs/vnode.c b/fs/afs/vnode.c
index a1904ab8426..ec814660209 100644
--- a/fs/afs/vnode.c
+++ b/fs/afs/vnode.c
@@ -261,7 +261,7 @@ int afs_vnode_fetch_status(struct afs_vnode *vnode,
DECLARE_WAITQUEUE(myself, current);
- _enter("%s,{%u,%u,%u}",
+ _enter("%s,{%x:%u.%u}",
vnode->volume->vlocation->vldb.name,
vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique);
@@ -389,7 +389,7 @@ int afs_vnode_fetch_data(struct afs_vnode *vnode, struct key *key,
struct afs_server *server;
int ret;
- _enter("%s{%u,%u,%u},%x,,,",
+ _enter("%s{%x:%u.%u},%x,,,",
vnode->volume->vlocation->vldb.name,
vnode->fid.vid,
vnode->fid.vnode,
@@ -446,7 +446,7 @@ int afs_vnode_create(struct afs_vnode *vnode, struct key *key,
struct afs_server *server;
int ret;
- _enter("%s{%u,%u,%u},%x,%s,,",
+ _enter("%s{%x:%u.%u},%x,%s,,",
vnode->volume->vlocation->vldb.name,
vnode->fid.vid,
vnode->fid.vnode,
@@ -502,7 +502,7 @@ int afs_vnode_remove(struct afs_vnode *vnode, struct key *key, const char *name,
struct afs_server *server;
int ret;
- _enter("%s{%u,%u,%u},%x,%s",
+ _enter("%s{%x:%u.%u},%x,%s",
vnode->volume->vlocation->vldb.name,
vnode->fid.vid,
vnode->fid.vnode,
@@ -557,7 +557,7 @@ extern int afs_vnode_link(struct afs_vnode *dvnode, struct afs_vnode *vnode,
struct afs_server *server;
int ret;
- _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s",
+ _enter("%s{%x:%u.%u},%s{%x:%u.%u},%x,%s",
dvnode->volume->vlocation->vldb.name,
dvnode->fid.vid,
dvnode->fid.vnode,
@@ -628,7 +628,7 @@ int afs_vnode_symlink(struct afs_vnode *vnode, struct key *key,
struct afs_server *server;
int ret;
- _enter("%s{%u,%u,%u},%x,%s,%s,,,",
+ _enter("%s{%x:%u.%u},%x,%s,%s,,,",
vnode->volume->vlocation->vldb.name,
vnode->fid.vid,
vnode->fid.vnode,
@@ -687,7 +687,7 @@ int afs_vnode_rename(struct afs_vnode *orig_dvnode,
struct afs_server *server;
int ret;
- _enter("%s{%u,%u,%u},%s{%u,%u,%u},%x,%s,%s",
+ _enter("%s{%x:%u.%u},%s{%u,%u,%u},%x,%s,%s",
orig_dvnode->volume->vlocation->vldb.name,
orig_dvnode->fid.vid,
orig_dvnode->fid.vnode,
@@ -753,3 +753,110 @@ no_server:
_leave(" = %ld [cnt %d]", PTR_ERR(server), orig_dvnode->update_cnt);
return PTR_ERR(server);
}
+
+/*
+ * write to a file
+ */
+int afs_vnode_store_data(struct afs_writeback *wb, pgoff_t first, pgoff_t last,
+ unsigned offset, unsigned to)
+{
+ struct afs_server *server;
+ struct afs_vnode *vnode = wb->vnode;
+ int ret;
+
+ _enter("%s{%x:%u.%u},%x,%lx,%lx,%x,%x",
+ vnode->volume->vlocation->vldb.name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ key_serial(wb->key),
+ first, last, offset, to);
+
+ /* this op will fetch the status */
+ spin_lock(&vnode->lock);
+ vnode->update_cnt++;
+ spin_unlock(&vnode->lock);
+
+ do {
+ /* pick a server to query */
+ server = afs_volume_pick_fileserver(vnode);
+ if (IS_ERR(server))
+ goto no_server;
+
+ _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+
+ ret = afs_fs_store_data(server, wb, first, last, offset, to,
+ &afs_sync_call);
+
+ } while (!afs_volume_release_fileserver(vnode, server, ret));
+
+ /* adjust the flags */
+ if (ret == 0) {
+ afs_vnode_finalise_status_update(vnode, server);
+ afs_put_server(server);
+ } else {
+ afs_vnode_status_update_failed(vnode, ret);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+
+no_server:
+ spin_lock(&vnode->lock);
+ vnode->update_cnt--;
+ ASSERTCMP(vnode->update_cnt, >=, 0);
+ spin_unlock(&vnode->lock);
+ return PTR_ERR(server);
+}
+
+/*
+ * set the attributes on a file
+ */
+int afs_vnode_setattr(struct afs_vnode *vnode, struct key *key,
+ struct iattr *attr)
+{
+ struct afs_server *server;
+ int ret;
+
+ _enter("%s{%x:%u.%u},%x",
+ vnode->volume->vlocation->vldb.name,
+ vnode->fid.vid,
+ vnode->fid.vnode,
+ vnode->fid.unique,
+ key_serial(key));
+
+ /* this op will fetch the status */
+ spin_lock(&vnode->lock);
+ vnode->update_cnt++;
+ spin_unlock(&vnode->lock);
+
+ do {
+ /* pick a server to query */
+ server = afs_volume_pick_fileserver(vnode);
+ if (IS_ERR(server))
+ goto no_server;
+
+ _debug("USING SERVER: %08x\n", ntohl(server->addr.s_addr));
+
+ ret = afs_fs_setattr(server, key, vnode, attr, &afs_sync_call);
+
+ } while (!afs_volume_release_fileserver(vnode, server, ret));
+
+ /* adjust the flags */
+ if (ret == 0) {
+ afs_vnode_finalise_status_update(vnode, server);
+ afs_put_server(server);
+ } else {
+ afs_vnode_status_update_failed(vnode, ret);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+
+no_server:
+ spin_lock(&vnode->lock);
+ vnode->update_cnt--;
+ ASSERTCMP(vnode->update_cnt, >=, 0);
+ spin_unlock(&vnode->lock);
+ return PTR_ERR(server);
+}
diff --git a/fs/afs/write.c b/fs/afs/write.c
new file mode 100644
index 00000000000..83ff2926281
--- /dev/null
+++ b/fs/afs/write.c
@@ -0,0 +1,835 @@
+/* handling of writes to regular files and writing back to the server
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include "internal.h"
+
+static int afs_write_back_from_locked_page(struct afs_writeback *wb,
+ struct page *page);
+
+/*
+ * mark a page as having been made dirty and thus needing writeback
+ */
+int afs_set_page_dirty(struct page *page)
+{
+ _enter("");
+ return __set_page_dirty_nobuffers(page);
+}
+
+/*
+ * unlink a writeback record because its usage has reached zero
+ * - must be called with the wb->vnode->writeback_lock held
+ */
+static void afs_unlink_writeback(struct afs_writeback *wb)
+{
+ struct afs_writeback *front;
+ struct afs_vnode *vnode = wb->vnode;
+
+ list_del_init(&wb->link);
+ if (!list_empty(&vnode->writebacks)) {
+ /* if an fsync rises to the front of the queue then wake it
+ * up */
+ front = list_entry(vnode->writebacks.next,
+ struct afs_writeback, link);
+ if (front->state == AFS_WBACK_SYNCING) {
+ _debug("wake up sync");
+ front->state = AFS_WBACK_COMPLETE;
+ wake_up(&front->waitq);
+ }
+ }
+}
+
+/*
+ * free a writeback record
+ */
+static void afs_free_writeback(struct afs_writeback *wb)
+{
+ _enter("");
+ key_put(wb->key);
+ kfree(wb);
+}
+
+/*
+ * dispose of a reference to a writeback record
+ */
+void afs_put_writeback(struct afs_writeback *wb)
+{
+ struct afs_vnode *vnode = wb->vnode;
+
+ _enter("{%d}", wb->usage);
+
+ spin_lock(&vnode->writeback_lock);
+ if (--wb->usage == 0)
+ afs_unlink_writeback(wb);
+ else
+ wb = NULL;
+ spin_unlock(&vnode->writeback_lock);
+ if (wb)
+ afs_free_writeback(wb);
+}
+
+/*
+ * partly or wholly fill a page that's under preparation for writing
+ */
+static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
+ unsigned start, unsigned len, struct page *page)
+{
+ int ret;
+
+ _enter(",,%u,%u", start, len);
+
+ ASSERTCMP(start + len, <=, PAGE_SIZE);
+
+ ret = afs_vnode_fetch_data(vnode, key, start, len, page);
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ _debug("got NOENT from server"
+ " - marking file deleted and stale");
+ set_bit(AFS_VNODE_DELETED, &vnode->flags);
+ ret = -ESTALE;
+ }
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * prepare a page for being written to
+ */
+static int afs_prepare_page(struct afs_vnode *vnode, struct page *page,
+ struct key *key, unsigned offset, unsigned to)
+{
+ unsigned eof, tail, start, stop, len;
+ loff_t i_size, pos;
+ void *p;
+ int ret;
+
+ _enter("");
+
+ if (offset == 0 && to == PAGE_SIZE)
+ return 0;
+
+ p = kmap(page);
+
+ i_size = i_size_read(&vnode->vfs_inode);
+ pos = (loff_t) page->index << PAGE_SHIFT;
+ if (pos >= i_size) {
+ /* partial write, page beyond EOF */
+ _debug("beyond");
+ if (offset > 0)
+ memset(p, 0, offset);
+ if (to < PAGE_SIZE)
+ memset(p + to, 0, PAGE_SIZE - to);
+ kunmap(page);
+ return 0;
+ }
+
+ if (i_size - pos >= PAGE_SIZE) {
+ /* partial write, page entirely before EOF */
+ _debug("before");
+ tail = eof = PAGE_SIZE;
+ } else {
+ /* partial write, page overlaps EOF */
+ eof = i_size - pos;
+ _debug("overlap %u", eof);
+ tail = max(eof, to);
+ if (tail < PAGE_SIZE)
+ memset(p + tail, 0, PAGE_SIZE - tail);
+ if (offset > eof)
+ memset(p + eof, 0, PAGE_SIZE - eof);
+ }
+
+ kunmap(p);
+
+ ret = 0;
+ if (offset > 0 || eof > to) {
+ /* need to fill one or two bits that aren't going to be written
+ * (cover both fillers in one read if there are two) */
+ start = (offset > 0) ? 0 : to;
+ stop = (eof > to) ? eof : offset;
+ len = stop - start;
+ _debug("wr=%u-%u av=0-%u rd=%u@%u",
+ offset, to, eof, start, len);
+ ret = afs_fill_page(vnode, key, start, len, page);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * prepare to perform part of a write to a page
+ * - the caller holds the page locked, preventing it from being written out or
+ * modified by anyone else
+ */
+int afs_prepare_write(struct file *file, struct page *page,
+ unsigned offset, unsigned to)
+{
+ struct afs_writeback *candidate, *wb;
+ struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+ struct key *key = file->private_data;
+ pgoff_t index;
+ int ret;
+
+ _enter("{%x:%u},{%lx},%u,%u",
+ vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
+
+ candidate = kzalloc(sizeof(*candidate), GFP_KERNEL);
+ if (!candidate)
+ return -ENOMEM;
+ candidate->vnode = vnode;
+ candidate->first = candidate->last = page->index;
+ candidate->offset_first = offset;
+ candidate->to_last = to;
+ candidate->usage = 1;
+ candidate->state = AFS_WBACK_PENDING;
+ init_waitqueue_head(&candidate->waitq);
+
+ if (!PageUptodate(page)) {
+ _debug("not up to date");
+ ret = afs_prepare_page(vnode, page, key, offset, to);
+ if (ret < 0) {
+ kfree(candidate);
+ _leave(" = %d [prep]", ret);
+ return ret;
+ }
+ SetPageUptodate(page);
+ }
+
+try_again:
+ index = page->index;
+ spin_lock(&vnode->writeback_lock);
+
+ /* see if this page is already pending a writeback under a suitable key
+ * - if so we can just join onto that one */
+ wb = (struct afs_writeback *) page_private(page);
+ if (wb) {
+ if (wb->key == key && wb->state == AFS_WBACK_PENDING)
+ goto subsume_in_current_wb;
+ goto flush_conflicting_wb;
+ }
+
+ if (index > 0) {
+ /* see if we can find an already pending writeback that we can
+ * append this page to */
+ list_for_each_entry(wb, &vnode->writebacks, link) {
+ if (wb->last == index - 1 && wb->key == key &&
+ wb->state == AFS_WBACK_PENDING)
+ goto append_to_previous_wb;
+ }
+ }
+
+ list_add_tail(&candidate->link, &vnode->writebacks);
+ candidate->key = key_get(key);
+ spin_unlock(&vnode->writeback_lock);
+ SetPagePrivate(page);
+ set_page_private(page, (unsigned long) candidate);
+ _leave(" = 0 [new]");
+ return 0;
+
+subsume_in_current_wb:
+ _debug("subsume");
+ ASSERTRANGE(wb->first, <=, index, <=, wb->last);
+ if (index == wb->first && offset < wb->offset_first)
+ wb->offset_first = offset;
+ if (index == wb->last && to > wb->to_last)
+ wb->to_last = to;
+ spin_unlock(&vnode->writeback_lock);
+ kfree(candidate);
+ _leave(" = 0 [sub]");
+ return 0;
+
+append_to_previous_wb:
+ _debug("append into %lx-%lx", wb->first, wb->last);
+ wb->usage++;
+ wb->last++;
+ wb->to_last = to;
+ spin_unlock(&vnode->writeback_lock);
+ SetPagePrivate(page);
+ set_page_private(page, (unsigned long) wb);
+ kfree(candidate);
+ _leave(" = 0 [app]");
+ return 0;
+
+ /* the page is currently bound to another context, so if it's dirty we
+ * need to flush it before we can use the new context */
+flush_conflicting_wb:
+ _debug("flush conflict");
+ if (wb->state == AFS_WBACK_PENDING)
+ wb->state = AFS_WBACK_CONFLICTING;
+ spin_unlock(&vnode->writeback_lock);
+ if (PageDirty(page)) {
+ ret = afs_write_back_from_locked_page(wb, page);
+ if (ret < 0) {
+ afs_put_writeback(candidate);
+ _leave(" = %d", ret);
+ return ret;
+ }
+ }
+
+ /* the page holds a ref on the writeback record */
+ afs_put_writeback(wb);
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
+ goto try_again;
+}
+
+/*
+ * finalise part of a write to a page
+ */
+int afs_commit_write(struct file *file, struct page *page,
+ unsigned offset, unsigned to)
+{
+ struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode);
+ loff_t i_size, maybe_i_size;
+
+ _enter("{%x:%u},{%lx},%u,%u",
+ vnode->fid.vid, vnode->fid.vnode, page->index, offset, to);
+
+ maybe_i_size = (loff_t) page->index << PAGE_SHIFT;
+ maybe_i_size += to;
+
+ i_size = i_size_read(&vnode->vfs_inode);
+ if (maybe_i_size > i_size) {
+ spin_lock(&vnode->writeback_lock);
+ i_size = i_size_read(&vnode->vfs_inode);
+ if (maybe_i_size > i_size)
+ i_size_write(&vnode->vfs_inode, maybe_i_size);
+ spin_unlock(&vnode->writeback_lock);
+ }
+
+ set_page_dirty(page);
+
+ if (PageDirty(page))
+ _debug("dirtied");
+
+ return 0;
+}
+
+/*
+ * kill all the pages in the given range
+ */
+static void afs_kill_pages(struct afs_vnode *vnode, bool error,
+ pgoff_t first, pgoff_t last)
+{
+ struct pagevec pv;
+ unsigned count, loop;
+
+ _enter("{%x:%u},%lx-%lx",
+ vnode->fid.vid, vnode->fid.vnode, first, last);
+
+ pagevec_init(&pv, 0);
+
+ do {
+ _debug("kill %lx-%lx", first, last);
+
+ count = last - first + 1;
+ if (count > PAGEVEC_SIZE)
+ count = PAGEVEC_SIZE;
+ pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping,
+ first, count, pv.pages);
+ ASSERTCMP(pv.nr, ==, count);
+
+ for (loop = 0; loop < count; loop++) {
+ ClearPageUptodate(pv.pages[loop]);
+ if (error)
+ SetPageError(pv.pages[loop]);
+ end_page_writeback(pv.pages[loop]);
+ }
+
+ __pagevec_release(&pv);
+ } while (first < last);
+
+ _leave("");
+}
+
+/*
+ * synchronously write back the locked page and any subsequent non-locked dirty
+ * pages also covered by the same writeback record
+ */
+static int afs_write_back_from_locked_page(struct afs_writeback *wb,
+ struct page *primary_page)
+{
+ struct page *pages[8], *page;
+ unsigned long count;
+ unsigned n, offset, to;
+ pgoff_t start, first, last;
+ int loop, ret;
+
+ _enter(",%lx", primary_page->index);
+
+ count = 1;
+ if (!clear_page_dirty_for_io(primary_page))
+ BUG();
+ if (test_set_page_writeback(primary_page))
+ BUG();
+
+ /* find all consecutive lockable dirty pages, stopping when we find a
+ * page that is not immediately lockable, is not dirty or is missing,
+ * or we reach the end of the range */
+ start = primary_page->index;
+ if (start >= wb->last)
+ goto no_more;
+ start++;
+ do {
+ _debug("more %lx [%lx]", start, count);
+ n = wb->last - start + 1;
+ if (n > ARRAY_SIZE(pages))
+ n = ARRAY_SIZE(pages);
+ n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping,
+ start, n, pages);
+ _debug("fgpc %u", n);
+ if (n == 0)
+ goto no_more;
+ if (pages[0]->index != start) {
+ for (n--; n >= 0; n--)
+ put_page(pages[n]);
+ goto no_more;
+ }
+
+ for (loop = 0; loop < n; loop++) {
+ page = pages[loop];
+ if (page->index > wb->last)
+ break;
+ if (TestSetPageLocked(page))
+ break;
+ if (!PageDirty(page) ||
+ page_private(page) != (unsigned long) wb) {
+ unlock_page(page);
+ break;
+ }
+ if (!clear_page_dirty_for_io(page))
+ BUG();
+ if (test_set_page_writeback(page))
+ BUG();
+ unlock_page(page);
+ put_page(page);
+ }
+ count += loop;
+ if (loop < n) {
+ for (; loop < n; loop++)
+ put_page(pages[loop]);
+ goto no_more;
+ }
+
+ start += loop;
+ } while (start <= wb->last && count < 65536);
+
+no_more:
+ /* we now have a contiguous set of dirty pages, each with writeback set
+ * and the dirty mark cleared; the first page is locked and must remain
+ * so, all the rest are unlocked */
+ first = primary_page->index;
+ last = first + count - 1;
+
+ offset = (first == wb->first) ? wb->offset_first : 0;
+ to = (last == wb->last) ? wb->to_last : PAGE_SIZE;
+
+ _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to);
+
+ ret = afs_vnode_store_data(wb, first, last, offset, to);
+ if (ret < 0) {
+ switch (ret) {
+ case -EDQUOT:
+ case -ENOSPC:
+ set_bit(AS_ENOSPC,
+ &wb->vnode->vfs_inode.i_mapping->flags);
+ break;
+ case -EROFS:
+ case -EIO:
+ case -EREMOTEIO:
+ case -EFBIG:
+ case -ENOENT:
+ case -ENOMEDIUM:
+ case -ENXIO:
+ afs_kill_pages(wb->vnode, true, first, last);
+ set_bit(AS_EIO, &wb->vnode->vfs_inode.i_mapping->flags);
+ break;
+ case -EACCES:
+ case -EPERM:
+ case -ENOKEY:
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EKEYREVOKED:
+ afs_kill_pages(wb->vnode, false, first, last);
+ break;
+ default:
+ break;
+ }
+ } else {
+ ret = count;
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * write a page back to the server
+ * - the caller locked the page for us
+ */
+int afs_writepage(struct page *page, struct writeback_control *wbc)
+{
+ struct backing_dev_info *bdi = page->mapping->backing_dev_info;
+ struct afs_writeback *wb;
+ int ret;
+
+ _enter("{%lx},", page->index);
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ wait_on_page_writeback(page);
+
+ if (PageWriteback(page) || !PageDirty(page)) {
+ unlock_page(page);
+ return 0;
+ }
+
+ wb = (struct afs_writeback *) page_private(page);
+ ASSERT(wb != NULL);
+
+ ret = afs_write_back_from_locked_page(wb, page);
+ unlock_page(page);
+ if (ret < 0) {
+ _leave(" = %d", ret);
+ return 0;
+ }
+
+ wbc->nr_to_write -= ret;
+ if (wbc->nonblocking && bdi_write_congested(bdi))
+ wbc->encountered_congestion = 1;
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * write a region of pages back to the server
+ */
+int afs_writepages_region(struct address_space *mapping,
+ struct writeback_control *wbc,
+ pgoff_t index, pgoff_t end, pgoff_t *_next)
+{
+ struct backing_dev_info *bdi = mapping->backing_dev_info;
+ struct afs_writeback *wb;
+ struct page *page;
+ int ret, n;
+
+ _enter(",,%lx,%lx,", index, end);
+
+ do {
+ n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY,
+ 1, &page);
+ if (!n)
+ break;
+
+ _debug("wback %lx", page->index);
+
+ if (page->index > end) {
+ *_next = index;
+ page_cache_release(page);
+ _leave(" = 0 [%lx]", *_next);
+ return 0;
+ }
+
+ /* at this point we hold neither mapping->tree_lock nor lock on
+ * the page itself: the page may be truncated or invalidated
+ * (changing page->mapping to NULL), or even swizzled back from
+ * swapper_space to tmpfs file mapping
+ */
+ lock_page(page);
+
+ if (page->mapping != mapping) {
+ unlock_page(page);
+ page_cache_release(page);
+ continue;
+ }
+
+ if (wbc->sync_mode != WB_SYNC_NONE)
+ wait_on_page_writeback(page);
+
+ if (PageWriteback(page) || !PageDirty(page)) {
+ unlock_page(page);
+ continue;
+ }
+
+ wb = (struct afs_writeback *) page_private(page);
+ ASSERT(wb != NULL);
+
+ spin_lock(&wb->vnode->writeback_lock);
+ wb->state = AFS_WBACK_WRITING;
+ spin_unlock(&wb->vnode->writeback_lock);
+
+ ret = afs_write_back_from_locked_page(wb, page);
+ unlock_page(page);
+ page_cache_release(page);
+ if (ret < 0) {
+ _leave(" = %d", ret);
+ return ret;
+ }
+
+ wbc->nr_to_write -= ret;
+
+ if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ wbc->encountered_congestion = 1;
+ break;
+ }
+
+ cond_resched();
+ } while (index < end && wbc->nr_to_write > 0);
+
+ *_next = index;
+ _leave(" = 0 [%lx]", *_next);
+ return 0;
+}
+
+/*
+ * write some of the pending data back to the server
+ */
+int afs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct backing_dev_info *bdi = mapping->backing_dev_info;
+ pgoff_t start, end, next;
+ int ret;
+
+ _enter("");
+
+ if (wbc->nonblocking && bdi_write_congested(bdi)) {
+ wbc->encountered_congestion = 1;
+ _leave(" = 0 [congest]");
+ return 0;
+ }
+
+ if (wbc->range_cyclic) {
+ start = mapping->writeback_index;
+ end = -1;
+ ret = afs_writepages_region(mapping, wbc, start, end, &next);
+ if (start > 0 && wbc->nr_to_write > 0 && ret == 0 &&
+ !(wbc->nonblocking && wbc->encountered_congestion))
+ ret = afs_writepages_region(mapping, wbc, 0, start,
+ &next);
+ mapping->writeback_index = next;
+ } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
+ end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
+ ret = afs_writepages_region(mapping, wbc, 0, end, &next);
+ if (wbc->nr_to_write > 0)
+ mapping->writeback_index = next;
+ } else {
+ start = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ ret = afs_writepages_region(mapping, wbc, start, end, &next);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * write an inode back
+ */
+int afs_write_inode(struct inode *inode, int sync)
+{
+ struct afs_vnode *vnode = AFS_FS_I(inode);
+ int ret;
+
+ _enter("{%x:%u},", vnode->fid.vid, vnode->fid.vnode);
+
+ ret = 0;
+ if (sync) {
+ ret = filemap_fdatawait(inode->i_mapping);
+ if (ret < 0)
+ __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+ }
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * completion of write to server
+ */
+void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call)
+{
+ struct afs_writeback *wb = call->wb;
+ struct pagevec pv;
+ unsigned count, loop;
+ pgoff_t first = call->first, last = call->last;
+ bool free_wb;
+
+ _enter("{%x:%u},{%lx-%lx}",
+ vnode->fid.vid, vnode->fid.vnode, first, last);
+
+ ASSERT(wb != NULL);
+
+ pagevec_init(&pv, 0);
+
+ do {
+ _debug("attach %lx-%lx", first, last);
+
+ count = last - first + 1;
+ if (count > PAGEVEC_SIZE)
+ count = PAGEVEC_SIZE;
+ pv.nr = find_get_pages_contig(call->mapping, first, count,
+ pv.pages);
+ ASSERTCMP(pv.nr, ==, count);
+
+ spin_lock(&vnode->writeback_lock);
+ for (loop = 0; loop < count; loop++) {
+ struct page *page = pv.pages[loop];
+ end_page_writeback(page);
+ if (page_private(page) == (unsigned long) wb) {
+ set_page_private(page, 0);
+ ClearPagePrivate(page);
+ wb->usage--;
+ }
+ }
+ free_wb = false;
+ if (wb->usage == 0) {
+ afs_unlink_writeback(wb);
+ free_wb = true;
+ }
+ spin_unlock(&vnode->writeback_lock);
+ first += count;
+ if (free_wb) {
+ afs_free_writeback(wb);
+ wb = NULL;
+ }
+
+ __pagevec_release(&pv);
+ } while (first < last);
+
+ _leave("");
+}
+
+/*
+ * write to an AFS file
+ */
+ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct dentry *dentry = iocb->ki_filp->f_path.dentry;
+ struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+ ssize_t result;
+ size_t count = iov_length(iov, nr_segs);
+ int ret;
+
+ _enter("{%x.%u},{%zu},%lu,",
+ vnode->fid.vid, vnode->fid.vnode, count, nr_segs);
+
+ if (IS_SWAPFILE(&vnode->vfs_inode)) {
+ printk(KERN_INFO
+ "AFS: Attempt to write to active swap file!\n");
+ return -EBUSY;
+ }
+
+ if (!count)
+ return 0;
+
+ result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+ if (IS_ERR_VALUE(result)) {
+ _leave(" = %zd", result);
+ return result;
+ }
+
+ /* return error values for O_SYNC and IS_SYNC() */
+ if (IS_SYNC(&vnode->vfs_inode) || iocb->ki_filp->f_flags & O_SYNC) {
+ ret = afs_fsync(iocb->ki_filp, dentry, 1);
+ if (ret < 0)
+ result = ret;
+ }
+
+ _leave(" = %zd", result);
+ return result;
+}
+
+/*
+ * flush the vnode to the fileserver
+ */
+int afs_writeback_all(struct afs_vnode *vnode)
+{
+ struct address_space *mapping = vnode->vfs_inode.i_mapping;
+ struct writeback_control wbc = {
+ .bdi = mapping->backing_dev_info,
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = LONG_MAX,
+ .for_writepages = 1,
+ .range_cyclic = 1,
+ };
+ int ret;
+
+ _enter("");
+
+ ret = mapping->a_ops->writepages(mapping, &wbc);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * flush any dirty pages for this process, and check for write errors.
+ * - the return status from this call provides a reliable indication of
+ * whether any write errors occurred for this process.
+ */
+int afs_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+ struct afs_writeback *wb, *xwb;
+ struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
+ int ret;
+
+ _enter("{%x:%u},{n=%s},%d",
+ vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
+ datasync);
+
+ /* use a writeback record as a marker in the queue - when this reaches
+ * the front of the queue, all the outstanding writes are either
+ * completed or rejected */
+ wb = kzalloc(sizeof(*wb), GFP_KERNEL);
+ if (!wb)
+ return -ENOMEM;
+ wb->vnode = vnode;
+ wb->first = 0;
+ wb->last = -1;
+ wb->offset_first = 0;
+ wb->to_last = PAGE_SIZE;
+ wb->usage = 1;
+ wb->state = AFS_WBACK_SYNCING;
+ init_waitqueue_head(&wb->waitq);
+
+ spin_lock(&vnode->writeback_lock);
+ list_for_each_entry(xwb, &vnode->writebacks, link) {
+ if (xwb->state == AFS_WBACK_PENDING)
+ xwb->state = AFS_WBACK_CONFLICTING;
+ }
+ list_add_tail(&wb->link, &vnode->writebacks);
+ spin_unlock(&vnode->writeback_lock);
+
+ /* push all the outstanding writebacks to the server */
+ ret = afs_writeback_all(vnode);
+ if (ret < 0) {
+ afs_put_writeback(wb);
+ _leave(" = %d [wb]", ret);
+ return ret;
+ }
+
+ /* wait for the preceding writes to actually complete */
+ ret = wait_event_interruptible(wb->waitq,
+ wb->state == AFS_WBACK_COMPLETE ||
+ vnode->writebacks.next == &wb->link);
+ afs_put_writeback(wb);
+ _leave(" = %d", ret);
+ return ret;
+}
diff --git a/fs/aio.c b/fs/aio.c
index b97ab8028b6..ac1c1587aa0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -346,10 +346,9 @@ void fastcall exit_aio(struct mm_struct *mm)
wait_for_all_aios(ctx);
/*
- * this is an overkill, but ensures we don't leave
- * the ctx on the aio_wq
+ * Ensure we don't leave the ctx on the aio_wq
*/
- flush_workqueue(aio_wq);
+ cancel_work_sync(&ctx->wq.work);
if (1 != atomic_read(&ctx->users))
printk(KERN_DEBUG
@@ -372,7 +371,7 @@ void fastcall __put_ioctx(struct kioctx *ctx)
BUG_ON(ctx->reqs_active);
cancel_delayed_work(&ctx->wq);
- flush_workqueue(aio_wq);
+ cancel_work_sync(&ctx->wq.work);
aio_free_ring(ctx);
mmdrop(ctx->mm);
ctx->mm = NULL;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 18657f001b4..72d0b412c37 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -675,19 +675,8 @@ static ssize_t
bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
{
char *s = enabled ? "enabled" : "disabled";
- int len = strlen(s);
- loff_t pos = *ppos;
- if (pos < 0)
- return -EINVAL;
- if (pos >= len)
- return 0;
- if (len < pos + nbytes)
- nbytes = len - pos;
- if (copy_to_user(buf, s + pos, nbytes))
- return -EFAULT;
- *ppos = pos + nbytes;
- return nbytes;
+ return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
}
static ssize_t bm_status_write(struct file * file, const char __user * buffer,
diff --git a/fs/buffer.c b/fs/buffer.c
index eb820b82a63..aecd057cd0e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1846,13 +1846,8 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
if (block_start >= to)
break;
if (buffer_new(bh)) {
- void *kaddr;
-
clear_buffer_new(bh);
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr+block_start, 0, bh->b_size);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, block_start, bh->b_size, KM_USER0);
set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
}
@@ -1940,10 +1935,8 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
SetPageError(page);
}
if (!buffer_mapped(bh)) {
- void *kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + i * blocksize, 0, blocksize);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, i * blocksize, blocksize,
+ KM_USER0);
if (!err)
set_buffer_uptodate(bh);
continue;
@@ -2086,7 +2079,6 @@ int cont_prepare_write(struct page *page, unsigned offset,
long status;
unsigned zerofrom;
unsigned blocksize = 1 << inode->i_blkbits;
- void *kaddr;
while(page->index > (pgpos = *bytes>>PAGE_CACHE_SHIFT)) {
status = -ENOMEM;
@@ -2108,10 +2100,8 @@ int cont_prepare_write(struct page *page, unsigned offset,
PAGE_CACHE_SIZE, get_block);
if (status)
goto out_unmap;
- kaddr = kmap_atomic(new_page, KM_USER0);
- memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
- flush_dcache_page(new_page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, zerofrom, PAGE_CACHE_SIZE - zerofrom,
+ KM_USER0);
generic_commit_write(NULL, new_page, zerofrom, PAGE_CACHE_SIZE);
unlock_page(new_page);
page_cache_release(new_page);
@@ -2138,10 +2128,7 @@ int cont_prepare_write(struct page *page, unsigned offset,
if (status)
goto out1;
if (zerofrom < offset) {
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr+zerofrom, 0, offset-zerofrom);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, zerofrom, offset - zerofrom, KM_USER0);
__block_commit_write(inode, page, zerofrom, offset);
}
return 0;
@@ -2340,10 +2327,7 @@ failed:
* Error recovery is pretty slack. Clear the page and mark it dirty
* so we'll later zero out any blocks which _were_ allocated.
*/
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0);
SetPageUptodate(page);
set_page_dirty(page);
return ret;
@@ -2382,7 +2366,6 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
unsigned offset;
- void *kaddr;
int ret;
/* Is the page fully inside i_size? */
@@ -2413,10 +2396,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
out:
ret = mpage_writepage(page, get_block, wbc);
if (ret == -EAGAIN)
@@ -2437,7 +2417,6 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from)
unsigned to;
struct page *page;
const struct address_space_operations *a_ops = mapping->a_ops;
- char *kaddr;
int ret = 0;
if ((offset & (blocksize - 1)) == 0)
@@ -2451,10 +2430,8 @@ int nobh_truncate_page(struct address_space *mapping, loff_t from)
to = (offset + blocksize) & ~(blocksize - 1);
ret = a_ops->prepare_write(NULL, page, offset, to);
if (ret == 0) {
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
+ KM_USER0);
/*
* It would be more correct to call aops->commit_write()
* here, but this is more efficient.
@@ -2480,7 +2457,6 @@ int block_truncate_page(struct address_space *mapping,
struct inode *inode = mapping->host;
struct page *page;
struct buffer_head *bh;
- void *kaddr;
int err;
blocksize = 1 << inode->i_blkbits;
@@ -2534,11 +2510,7 @@ int block_truncate_page(struct address_space *mapping,
goto unlock;
}
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, 0, length);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
-
+ zero_user_page(page, offset, length, KM_USER0);
mark_buffer_dirty(bh);
err = 0;
@@ -2559,7 +2531,6 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
unsigned offset;
- void *kaddr;
/* Is the page fully inside i_size? */
if (page->index < end_index)
@@ -2585,10 +2556,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, offset, PAGE_CACHE_SIZE - offset, KM_USER0);
return __block_write_full_page(inode, page, get_block, wbc);
}
@@ -2978,7 +2946,7 @@ static void buffer_exit_cpu(int cpu)
static int buffer_cpu_notify(struct notifier_block *self,
unsigned long action, void *hcpu)
{
- if (action == CPU_DEAD)
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
buffer_exit_cpu((unsigned long)hcpu);
return NOTIFY_OK;
}
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index d98be5e0132..3527c7c6def 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -77,36 +77,6 @@ static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buf
return ret;
}
-
-/**
- * flush_read_buffer - push buffer to userspace.
- * @buffer: data buffer for file.
- * @userbuf: user-passed buffer.
- * @count: number of bytes requested.
- * @ppos: file position.
- *
- * Copy the buffer we filled in fill_read_buffer() to userspace.
- * This is done at the reader's leisure, copying and advancing
- * the amount they specify each time.
- * This may be called continuously until the buffer is empty.
- */
-static int flush_read_buffer(struct configfs_buffer * buffer, char __user * buf,
- size_t count, loff_t * ppos)
-{
- int error;
-
- if (*ppos > buffer->count)
- return 0;
-
- if (count > (buffer->count - *ppos))
- count = buffer->count - *ppos;
-
- error = copy_to_user(buf,buffer->page + *ppos,count);
- if (!error)
- *ppos += count;
- return error ? -EFAULT : count;
-}
-
/**
* configfs_read_file - read an attribute.
* @file: file pointer.
@@ -139,7 +109,8 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
}
pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
__FUNCTION__, count, *ppos, buffer->page);
- retval = flush_read_buffer(buffer,buf,count,ppos);
+ retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
+ buffer->count);
out:
up(&buffer->sem);
return retval;
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1e88d8d1d2a..8593f3dfd29 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -867,7 +867,6 @@ static int do_direct_IO(struct dio *dio)
do_holes:
/* Handle holes */
if (!buffer_mapped(map_bh)) {
- char *kaddr;
loff_t i_size_aligned;
/* AKPM: eargh, -ENOTBLK is a hack */
@@ -888,11 +887,8 @@ do_holes:
page_cache_release(page);
goto out;
}
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + (block_in_page << blkbits),
- 0, 1 << blkbits);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, block_in_page << blkbits,
+ 1 << blkbits, KM_USER0);
dio->block_in_file++;
block_in_page++;
goto next_block;
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index e1bb0317198..a6cb6171c3a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1767,7 +1767,6 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
struct inode *inode = mapping->host;
struct buffer_head *bh;
int err = 0;
- void *kaddr;
blocksize = inode->i_sb->s_blocksize;
length = blocksize - (offset & (blocksize - 1));
@@ -1779,10 +1778,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
*/
if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
ext3_should_writeback_data(inode) && PageUptodate(page)) {
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, 0, length);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, offset, length, KM_USER0);
set_page_dirty(page);
goto unlock;
}
@@ -1835,11 +1831,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
goto unlock;
}
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, 0, length);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
-
+ zero_user_page(page, offset, length, KM_USER0);
BUFFER_TRACE(bh, "zeroed end of block");
err = 0;
diff --git a/fs/mpage.c b/fs/mpage.c
index fa2441f57b4..0fb914fc2ee 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -284,11 +284,9 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
}
if (first_hole != blocks_per_page) {
- char *kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + (first_hole << blkbits), 0,
- PAGE_CACHE_SIZE - (first_hole << blkbits));
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, first_hole << blkbits,
+ PAGE_CACHE_SIZE - (first_hole << blkbits),
+ KM_USER0);
if (first_hole == 0) {
SetPageUptodate(page);
unlock_page(page);
@@ -576,14 +574,11 @@ page_is_mapped:
* written out to the file."
*/
unsigned offset = i_size & (PAGE_CACHE_SIZE - 1);
- char *kaddr;
if (page->index > end_index || !offset)
goto confused;
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, offset, PAGE_CACHE_SIZE - offset,
+ KM_USER0);
}
/*
diff --git a/fs/namei.c b/fs/namei.c
index 856b2f5da51..b3780e3fc88 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1152,14 +1152,12 @@ static int fastcall do_path_lookup(int dfd, const char *name,
fput_light(file, fput_needed);
}
- current->total_link_count = 0;
- retval = link_path_walk(name, nd);
+
+ retval = path_walk(name, nd);
out:
- if (likely(retval == 0)) {
- if (unlikely(!audit_dummy_context() && nd && nd->dentry &&
+ if (unlikely(!retval && !audit_dummy_context() && nd->dentry &&
nd->dentry->d_inode))
audit_inode(name, nd->dentry->d_inode);
- }
out_fail:
return retval;
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index ce341dc76d5..9b118ee2019 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -11,4 +11,3 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
nfs4acl.o nfs4callback.o nfs4recover.o
-nfsd-objs := $(nfsd-y)
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 6f24768272a..79bd03b8bbf 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -469,6 +469,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
nd.dentry = NULL;
exp.ex_path = NULL;
+ /* fs locations */
+ exp.ex_fslocs.locations = NULL;
+ exp.ex_fslocs.locations_count = 0;
+ exp.ex_fslocs.migrated = 0;
+
+ exp.ex_uuid = NULL;
+
if (mesg[mlen-1] != '\n')
return -EINVAL;
mesg[mlen-1] = 0;
@@ -509,13 +516,6 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (exp.h.expiry_time == 0)
goto out;
- /* fs locations */
- exp.ex_fslocs.locations = NULL;
- exp.ex_fslocs.locations_count = 0;
- exp.ex_fslocs.migrated = 0;
-
- exp.ex_uuid = NULL;
-
/* flags */
err = get_int(&mesg, &an_int);
if (err == -ENOENT)
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7f5bad0393b..eac82830bfd 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -177,7 +177,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
if (max_blocksize < resp->count)
resp->count = max_blocksize;
- svc_reserve(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
+ svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
nfserr = nfsd_read(rqstp, &resp->fh, NULL,
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 7e4bb0af24d..10f6e7dcf63 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -239,7 +239,7 @@ static __be32 *
encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
{
struct dentry *dentry = fhp->fh_dentry;
- if (dentry && dentry->d_inode != NULL) {
+ if (dentry && dentry->d_inode) {
int err;
struct kstat stat;
@@ -300,9 +300,9 @@ int
nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_sattrargs *args)
{
- if (!(p = decode_fh(p, &args->fh))
- || !(p = decode_sattr3(p, &args->attrs)))
+ if (!(p = decode_fh(p, &args->fh)))
return 0;
+ p = decode_sattr3(p, &args->attrs);
if ((args->check_guard = ntohl(*p++)) != 0) {
struct timespec time;
@@ -343,9 +343,9 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
int v,pn;
u32 max_blocksize = svc_max_payload(rqstp);
- if (!(p = decode_fh(p, &args->fh))
- || !(p = xdr_decode_hyper(p, &args->offset)))
+ if (!(p = decode_fh(p, &args->fh)))
return 0;
+ p = xdr_decode_hyper(p, &args->offset);
len = args->count = ntohl(*p++);
@@ -369,28 +369,44 @@ int
nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_writeargs *args)
{
- unsigned int len, v, hdr;
+ unsigned int len, v, hdr, dlen;
u32 max_blocksize = svc_max_payload(rqstp);
- if (!(p = decode_fh(p, &args->fh))
- || !(p = xdr_decode_hyper(p, &args->offset)))
+ if (!(p = decode_fh(p, &args->fh)))
return 0;
+ p = xdr_decode_hyper(p, &args->offset);
args->count = ntohl(*p++);
args->stable = ntohl(*p++);
len = args->len = ntohl(*p++);
+ /*
+ * The count must equal the amount of data passed.
+ */
+ if (args->count != args->len)
+ return 0;
+ /*
+ * Check to make sure that we got the right number of
+ * bytes.
+ */
hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
- if (rqstp->rq_arg.len < hdr ||
- rqstp->rq_arg.len - hdr < len)
+ dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len
+ - hdr;
+ /*
+ * Round the length of the data which was specified up to
+ * the next multiple of XDR units and then compare that
+ * against the length which was actually received.
+ */
+ if (dlen != XDR_QUADLEN(len)*4)
return 0;
+ if (args->count > max_blocksize) {
+ args->count = max_blocksize;
+ len = args->len = max_blocksize;
+ }
rqstp->rq_vec[0].iov_base = (void*)p;
rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
-
- if (len > max_blocksize)
- len = max_blocksize;
- v= 0;
+ v = 0;
while (len > rqstp->rq_vec[v].iov_len) {
len -= rqstp->rq_vec[v].iov_len;
v++;
@@ -398,9 +414,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
rqstp->rq_vec[v].iov_len = PAGE_SIZE;
}
rqstp->rq_vec[v].iov_len = len;
- args->vlen = v+1;
-
- return args->count == args->len && rqstp->rq_vec[0].iov_len > 0;
+ args->vlen = v + 1;
+ return 1;
}
int
@@ -414,8 +429,7 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
switch (args->createmode = ntohl(*p++)) {
case NFS3_CREATE_UNCHECKED:
case NFS3_CREATE_GUARDED:
- if (!(p = decode_sattr3(p, &args->attrs)))
- return 0;
+ p = decode_sattr3(p, &args->attrs);
break;
case NFS3_CREATE_EXCLUSIVE:
args->verf = p;
@@ -431,10 +445,10 @@ int
nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_createargs *args)
{
- if (!(p = decode_fh(p, &args->fh))
- || !(p = decode_filename(p, &args->name, &args->len))
- || !(p = decode_sattr3(p, &args->attrs)))
+ if (!(p = decode_fh(p, &args->fh)) ||
+ !(p = decode_filename(p, &args->name, &args->len)))
return 0;
+ p = decode_sattr3(p, &args->attrs);
return xdr_argsize_check(rqstp, p);
}
@@ -448,11 +462,12 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
char *old, *new;
struct kvec *vec;
- if (!(p = decode_fh(p, &args->ffh))
- || !(p = decode_filename(p, &args->fname, &args->flen))
- || !(p = decode_sattr3(p, &args->attrs))
+ if (!(p = decode_fh(p, &args->ffh)) ||
+ !(p = decode_filename(p, &args->fname, &args->flen))
)
return 0;
+ p = decode_sattr3(p, &args->attrs);
+
/* now decode the pathname, which might be larger than the first page.
* As we have to check for nul's anyway, we copy it into a new page
* This page appears in the rq_res.pages list, but as pages_len is always
@@ -502,10 +517,8 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
args->ftype = ntohl(*p++);
if (args->ftype == NF3BLK || args->ftype == NF3CHR
- || args->ftype == NF3SOCK || args->ftype == NF3FIFO) {
- if (!(p = decode_sattr3(p, &args->attrs)))
- return 0;
- }
+ || args->ftype == NF3SOCK || args->ftype == NF3FIFO)
+ p = decode_sattr3(p, &args->attrs);
if (args->ftype == NF3BLK || args->ftype == NF3CHR) {
args->major = ntohl(*p++);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 673a53c014a..cc3b7badd48 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -137,7 +137,6 @@ struct ace_container {
static short ace2type(struct nfs4_ace *);
static void _posix_to_nfsv4_one(struct posix_acl *, struct nfs4_acl *,
unsigned int);
-void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
struct nfs4_acl *
nfs4_acl_posix_to_nfsv4(struct posix_acl *pacl, struct posix_acl *dpacl,
@@ -785,21 +784,6 @@ nfs4_acl_new(int n)
return acl;
}
-void
-nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
- int whotype, uid_t who)
-{
- struct nfs4_ace *ace = acl->aces + acl->naces;
-
- ace->type = type;
- ace->flag = flag;
- ace->access_mask = access_mask;
- ace->whotype = whotype;
- ace->who = who;
-
- acl->naces++;
-}
-
static struct {
char *string;
int stringlen;
@@ -851,6 +835,5 @@ nfs4_acl_write_who(int who, char *p)
}
EXPORT_SYMBOL(nfs4_acl_new);
-EXPORT_SYMBOL(nfs4_acl_add_ace);
EXPORT_SYMBOL(nfs4_acl_get_whotype);
EXPORT_SYMBOL(nfs4_acl_write_who);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 678f3be88ac..3cc8ce422ab 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1326,8 +1326,6 @@ do_recall(void *__dp)
{
struct nfs4_delegation *dp = __dp;
- daemonize("nfsv4-recall");
-
nfsd4_cb_recall(dp);
return 0;
}
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 739dd3c5c3b..6ca2d24fc21 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -323,7 +323,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
*
*/
- u8 version = 1;
+ u8 version;
u8 fsid_type = 0;
struct inode * inode = dentry->d_inode;
struct dentry *parent = dentry->d_parent;
@@ -341,15 +341,59 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
* the reference filehandle (if it is in the same export)
* or the export options.
*/
+ retry:
+ version = 1;
if (ref_fh && ref_fh->fh_export == exp) {
version = ref_fh->fh_handle.fh_version;
- if (version == 0xca)
+ fsid_type = ref_fh->fh_handle.fh_fsid_type;
+
+ if (ref_fh == fhp)
+ fh_put(ref_fh);
+ ref_fh = NULL;
+
+ switch (version) {
+ case 0xca:
fsid_type = FSID_DEV;
- else
- fsid_type = ref_fh->fh_handle.fh_fsid_type;
- /* We know this version/type works for this export
- * so there is no need for further checks.
+ break;
+ case 1:
+ break;
+ default:
+ goto retry;
+ }
+
+ /* Need to check that this type works for this
+ * export point. As the fsid -> filesystem mapping
+ * was guided by user-space, there is no guarantee
+ * that the filesystem actually supports that fsid
+ * type. If it doesn't we loop around again without
+ * ref_fh set.
*/
+ switch(fsid_type) {
+ case FSID_DEV:
+ if (!old_valid_dev(ex_dev))
+ goto retry;
+ /* FALL THROUGH */
+ case FSID_MAJOR_MINOR:
+ case FSID_ENCODE_DEV:
+ if (!(exp->ex_dentry->d_inode->i_sb->s_type->fs_flags
+ & FS_REQUIRES_DEV))
+ goto retry;
+ break;
+ case FSID_NUM:
+ if (! (exp->ex_flags & NFSEXP_FSID))
+ goto retry;
+ break;
+ case FSID_UUID8:
+ case FSID_UUID16:
+ if (!root_export)
+ goto retry;
+ /* fall through */
+ case FSID_UUID4_INUM:
+ case FSID_UUID16_INUM:
+ if (exp->ex_uuid == NULL)
+ goto retry;
+ break;
+ }
} else if (exp->ex_uuid) {
if (fhp->fh_maxsize >= 64) {
if (root_export)
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 5cc2eec981b..b2c7147aa92 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -155,7 +155,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
argp->count);
argp->count = NFSSVC_MAXBLKSIZE_V2;
}
- svc_reserve(rqstp, (19<<2) + argp->count + 4);
+ svc_reserve_auth(rqstp, (19<<2) + argp->count + 4);
resp->count = argp->count;
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), NULL,
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 0c24b9e24fe..cb3e7fadb77 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -231,9 +231,10 @@ int
nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_sattrargs *args)
{
- if (!(p = decode_fh(p, &args->fh))
- || !(p = decode_sattr(p, &args->attrs)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
+ p = decode_sattr(p, &args->attrs);
return xdr_argsize_check(rqstp, p);
}
@@ -284,8 +285,9 @@ int
nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_writeargs *args)
{
- unsigned int len;
+ unsigned int len, hdr, dlen;
int v;
+
if (!(p = decode_fh(p, &args->fh)))
return 0;
@@ -293,11 +295,30 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
args->offset = ntohl(*p++); /* offset */
p++; /* totalcount */
len = args->len = ntohl(*p++);
- rqstp->rq_vec[0].iov_base = (void*)p;
- rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len -
- (((void*)p) - rqstp->rq_arg.head[0].iov_base);
+ /*
+ * The protocol specifies a maximum of 8192 bytes.
+ */
if (len > NFSSVC_MAXBLKSIZE_V2)
- len = NFSSVC_MAXBLKSIZE_V2;
+ return 0;
+
+ /*
+ * Check to make sure that we got the right number of
+ * bytes.
+ */
+ hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
+ dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len
+ - hdr;
+
+ /*
+ * Round the length of the data which was specified up to
+ * the next multiple of XDR units and then compare that
+ * against the length which was actually received.
+ */
+ if (dlen != XDR_QUADLEN(len)*4)
+ return 0;
+
+ rqstp->rq_vec[0].iov_base = (void*)p;
+ rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
v = 0;
while (len > rqstp->rq_vec[v].iov_len) {
len -= rqstp->rq_vec[v].iov_len;
@@ -306,18 +327,18 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
rqstp->rq_vec[v].iov_len = PAGE_SIZE;
}
rqstp->rq_vec[v].iov_len = len;
- args->vlen = v+1;
- return rqstp->rq_vec[0].iov_len > 0;
+ args->vlen = v + 1;
+ return 1;
}
int
nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_createargs *args)
{
- if (!(p = decode_fh(p, &args->fh))
- || !(p = decode_filename(p, &args->name, &args->len))
- || !(p = decode_sattr(p, &args->attrs)))
+ if ( !(p = decode_fh(p, &args->fh))
+ || !(p = decode_filename(p, &args->name, &args->len)))
return 0;
+ p = decode_sattr(p, &args->attrs);
return xdr_argsize_check(rqstp, p);
}
@@ -361,11 +382,11 @@ int
nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_symlinkargs *args)
{
- if (!(p = decode_fh(p, &args->ffh))
- || !(p = decode_filename(p, &args->fname, &args->flen))
- || !(p = decode_pathname(p, &args->tname, &args->tlen))
- || !(p = decode_sattr(p, &args->attrs)))
+ if ( !(p = decode_fh(p, &args->ffh))
+ || !(p = decode_filename(p, &args->fname, &args->flen))
+ || !(p = decode_pathname(p, &args->tname, &args->tlen)))
return 0;
+ p = decode_sattr(p, &args->attrs);
return xdr_argsize_check(rqstp, p);
}
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index ab45db529c8..9e451a68580 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1059,20 +1059,12 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
maping blocks, since there is none, so we just zero out remaining
parts of first and last pages in write area (if needed) */
if ((pos & ~((loff_t) PAGE_CACHE_SIZE - 1)) > inode->i_size) {
- if (from != 0) { /* First page needs to be partially zeroed */
- char *kaddr = kmap_atomic(prepared_pages[0], KM_USER0);
- memset(kaddr, 0, from);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(prepared_pages[0]);
- }
- if (to != PAGE_CACHE_SIZE) { /* Last page needs to be partially zeroed */
- char *kaddr =
- kmap_atomic(prepared_pages[num_pages - 1],
- KM_USER0);
- memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(prepared_pages[num_pages - 1]);
- }
+ if (from != 0) /* First page needs to be partially zeroed */
+ zero_user_page(prepared_pages[0], 0, from, KM_USER0);
+
+ if (to != PAGE_CACHE_SIZE) /* Last page needs to be partially zeroed */
+ zero_user_page(prepared_pages[num_pages-1], to,
+ PAGE_CACHE_SIZE - to, KM_USER0);
/* Since all blocks are new - use already calculated value */
return blocks;
@@ -1199,13 +1191,9 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
ll_rw_block(READ, 1, &bh);
*wait_bh++ = bh;
} else { /* Not mapped, zero it */
- char *kaddr =
- kmap_atomic(prepared_pages[0],
- KM_USER0);
- memset(kaddr + block_start, 0,
- from - block_start);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(prepared_pages[0]);
+ zero_user_page(prepared_pages[0],
+ block_start,
+ from - block_start, KM_USER0);
set_buffer_uptodate(bh);
}
}
@@ -1237,13 +1225,8 @@ static int reiserfs_prepare_file_region_for_write(struct inode *inode
ll_rw_block(READ, 1, &bh);
*wait_bh++ = bh;
} else { /* Not mapped, zero it */
- char *kaddr =
- kmap_atomic(prepared_pages
- [num_pages - 1],
- KM_USER0);
- memset(kaddr + to, 0, block_end - to);
- kunmap_atomic(kaddr, KM_USER0);
- flush_dcache_page(prepared_pages[num_pages - 1]);
+ zero_user_page(prepared_pages[num_pages-1],
+ to, block_end - to, KM_USER0);
set_buffer_uptodate(bh);
}
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 9fcbfe31697..1272d11399f 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2148,13 +2148,8 @@ int reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps)
length = offset & (blocksize - 1);
/* if we are not on a block boundary */
if (length) {
- char *kaddr;
-
length = blocksize - length;
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + offset, 0, length);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, offset, length, KM_USER0);
if (buffer_mapped(bh) && bh->b_blocknr != 0) {
mark_buffer_dirty(bh);
}
@@ -2370,7 +2365,6 @@ static int reiserfs_write_full_page(struct page *page,
** last byte in the file
*/
if (page->index >= end_index) {
- char *kaddr;
unsigned last_offset;
last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
@@ -2379,10 +2373,7 @@ static int reiserfs_write_full_page(struct page *page,
unlock_page(page);
return 0;
}
- kaddr = kmap_atomic(page, KM_USER0);
- memset(kaddr + last_offset, 0, PAGE_CACHE_SIZE - last_offset);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, last_offset, PAGE_CACHE_SIZE - last_offset, KM_USER0);
}
bh = head;
block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 0e637adc2b8..b502c7197ec 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -111,36 +111,6 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
return ret;
}
-
-/**
- * flush_read_buffer - push buffer to userspace.
- * @buffer: data buffer for file.
- * @buf: user-passed buffer.
- * @count: number of bytes requested.
- * @ppos: file position.
- *
- * Copy the buffer we filled in fill_read_buffer() to userspace.
- * This is done at the reader's leisure, copying and advancing
- * the amount they specify each time.
- * This may be called continuously until the buffer is empty.
- */
-static int flush_read_buffer(struct sysfs_buffer * buffer, char __user * buf,
- size_t count, loff_t * ppos)
-{
- int error;
-
- if (*ppos > buffer->count)
- return 0;
-
- if (count > (buffer->count - *ppos))
- count = buffer->count - *ppos;
-
- error = copy_to_user(buf,buffer->page + *ppos,count);
- if (!error)
- *ppos += count;
- return error ? -EFAULT : count;
-}
-
/**
* sysfs_read_file - read an attribute.
* @file: file pointer.
@@ -177,7 +147,8 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
}
pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
__FUNCTION__, count, *ppos, buffer->page);
- retval = flush_read_buffer(buffer,buf,count,ppos);
+ retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
+ buffer->count);
out:
up(&buffer->sem);
return retval;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f5aa3ef855f..a96bde6df96 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1734,11 +1734,13 @@ xfs_icsb_cpu_notify(
per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
switch (action) {
case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
/* Easy Case - initialize the area and locks, and
* then rebalance when online does everything else for us. */
memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
break;
case CPU_ONLINE:
+ case CPU_ONLINE_FROZEN:
xfs_icsb_lock(mp);
xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
@@ -1746,6 +1748,7 @@ xfs_icsb_cpu_notify(
xfs_icsb_unlock(mp);
break;
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
/* Disable all the counters, then fold the dead cpu's
* count into the total on the global superblock and
* re-enable the counters. */
diff --git a/include/asm-alpha/smp.h b/include/asm-alpha/smp.h
index a1a1eca6be4..286e1d844f6 100644
--- a/include/asm-alpha/smp.h
+++ b/include/asm-alpha/smp.h
@@ -51,6 +51,7 @@ int smp_call_function_on_cpu(void (*func) (void *info), void *info,int retry, in
#else /* CONFIG_SMP */
+#define hard_smp_processor_id() 0
#define smp_call_function_on_cpu(func,info,retry,wait,cpu) ({ 0; })
#endif /* CONFIG_SMP */
diff --git a/include/asm-alpha/thread_info.h b/include/asm-alpha/thread_info.h
index eeb3bef91e1..f4defc2bd3f 100644
--- a/include/asm-alpha/thread_info.h
+++ b/include/asm-alpha/thread_info.h
@@ -97,7 +97,7 @@ register struct thread_info *__current_thread_info __asm__("$8");
1 << TIF_UAC_SIGBUS)
#define SET_UNALIGN_CTL(task,value) ({ \
- (task)->thread_info->flags = (((task)->thread_info->flags & \
+ task_thread_info(task)->flags = ((task_thread_info(task)->flags & \
~ALPHA_UAC_MASK) \
| (((value) << ALPHA_UAC_SHIFT) & (1<<TIF_UAC_NOPRINT))\
| (((value) << (ALPHA_UAC_SHIFT + 1)) & (1<<TIF_UAC_SIGBUS)) \
@@ -105,11 +105,11 @@ register struct thread_info *__current_thread_info __asm__("$8");
0; })
#define GET_UNALIGN_CTL(task,value) ({ \
- put_user(((task)->thread_info->flags & (1 << TIF_UAC_NOPRINT)) \
+ put_user((task_thread_info(task)->flags & (1 << TIF_UAC_NOPRINT))\
>> ALPHA_UAC_SHIFT \
- | ((task)->thread_info->flags & (1 << TIF_UAC_SIGBUS)) \
+ | (task_thread_info(task)->flags & (1 << TIF_UAC_SIGBUS))\
>> (ALPHA_UAC_SHIFT + 1) \
- | ((task)->thread_info->flags & (1 << TIF_UAC_NOFIX)) \
+ | (task_thread_info(task)->flags & (1 << TIF_UAC_NOFIX))\
>> (ALPHA_UAC_SHIFT - 1), \
(int __user *)(value)); \
})
diff --git a/include/asm-arm/arch-at91/cpu.h b/include/asm-arm/arch-at91/cpu.h
index d464ca58cdb..7ef4eebe9f8 100644
--- a/include/asm-arm/arch-at91/cpu.h
+++ b/include/asm-arm/arch-at91/cpu.h
@@ -68,4 +68,10 @@ static inline unsigned long at91_arch_identify(void)
#define cpu_is_at91sam9263() (0)
#endif
+/*
+ * Since this is ARM, we will never run on any AVR32 CPU. But these
+ * definitions may reduce clutter in common drivers.
+ */
+#define cpu_is_at32ap7000() (0)
+
#endif
diff --git a/include/asm-avr32/arch-at32ap/cpu.h b/include/asm-avr32/arch-at32ap/cpu.h
new file mode 100644
index 00000000000..2bdc5bd6f79
--- /dev/null
+++ b/include/asm-avr32/arch-at32ap/cpu.h
@@ -0,0 +1,33 @@
+/*
+ * AVR32 and (fake) AT91 CPU identification
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_ARCH_CPU_H
+#define __ASM_ARCH_CPU_H
+
+/*
+ * Only AT32AP7000 is defined for now. We can identify the specific
+ * chip at runtime, but I'm not sure if it's really worth it.
+ */
+#ifdef CONFIG_CPU_AT32AP7000
+# define cpu_is_at32ap7000() (1)
+#else
+# define cpu_is_at32ap7000() (0)
+#endif
+
+/*
+ * Since this is AVR32, we will never run on any AT91 CPU. But these
+ * definitions may reduce clutter in common drivers.
+ */
+#define cpu_is_at91rm9200() (0)
+#define cpu_is_at91sam9xe() (0)
+#define cpu_is_at91sam9260() (0)
+#define cpu_is_at91sam9261() (0)
+#define cpu_is_at91sam9263() (0)
+
+#endif /* __ASM_ARCH_CPU_H */
diff --git a/include/asm-avr32/setup.h b/include/asm-avr32/setup.h
index 1ff1a217015..b0828d43e11 100644
--- a/include/asm-avr32/setup.h
+++ b/include/asm-avr32/setup.h
@@ -110,7 +110,7 @@ struct tagtable {
int (*parse)(struct tag *);
};
-#define __tag __attribute_used__ __attribute__((__section__(".taglist")))
+#define __tag __attribute_used__ __attribute__((__section__(".taglist.init")))
#define __tagtable(tag, fn) \
static struct tagtable __tagtable_##fn __tag = { tag, fn }
diff --git a/include/asm-avr32/unistd.h b/include/asm-avr32/unistd.h
index 8f512047181..2418cce624c 100644
--- a/include/asm-avr32/unistd.h
+++ b/include/asm-avr32/unistd.h
@@ -295,8 +295,10 @@
#define __NR_shmdt 276
#define __NR_shmctl 277
+#define __NR_utimensat 278
+
#ifdef __KERNEL__
-#define NR_syscalls 278
+#define NR_syscalls 279
#define __ARCH_WANT_IPC_PARSE_VERSION
diff --git a/include/asm-blackfin/processor.h b/include/asm-blackfin/processor.h
index 997465c93e8..0336ff132c1 100644
--- a/include/asm-blackfin/processor.h
+++ b/include/asm-blackfin/processor.h
@@ -58,10 +58,10 @@ do { \
(_regs)->pc = (_pc); \
if (current->mm) \
(_regs)->p5 = current->mm->start_data; \
- current->thread_info->l1_task_info.stack_start \
+ task_thread_info(current)->l1_task_info.stack_start \
= (void *)current->mm->context.stack_start; \
- current->thread_info->l1_task_info.lowest_sp = (void *)(_usp); \
- memcpy(L1_SCRATCH_TASK_INFO, &current->thread_info->l1_task_info, \
+ task_thread_info(current)->l1_task_info.lowest_sp = (void *)(_usp); \
+ memcpy(L1_SCRATCH_TASK_INFO, &task_thread_info(current)->l1_task_info, \
sizeof(*L1_SCRATCH_TASK_INFO)); \
wrusp(_usp); \
} while(0)
diff --git a/include/asm-blackfin/system.h b/include/asm-blackfin/system.h
index b5bf6e7cb5e..5e5f1a0566c 100644
--- a/include/asm-blackfin/system.h
+++ b/include/asm-blackfin/system.h
@@ -239,9 +239,9 @@ asmlinkage struct task_struct *resume(struct task_struct *prev, struct task_stru
#define switch_to(prev,next,last) \
do { \
- memcpy (&prev->thread_info->l1_task_info, L1_SCRATCH_TASK_INFO, \
+ memcpy (&task_thread_info(prev)->l1_task_info, L1_SCRATCH_TASK_INFO, \
sizeof *L1_SCRATCH_TASK_INFO); \
- memcpy (L1_SCRATCH_TASK_INFO, &next->thread_info->l1_task_info, \
+ memcpy (L1_SCRATCH_TASK_INFO, &task_thread_info(next)->l1_task_info, \
sizeof *L1_SCRATCH_TASK_INFO); \
(last) = resume (prev, next); \
} while (0)
diff --git a/include/asm-frv/tlb.h b/include/asm-frv/tlb.h
index f94fe5cb9b3..cd458eb6d75 100644
--- a/include/asm-frv/tlb.h
+++ b/include/asm-frv/tlb.h
@@ -3,7 +3,11 @@
#include <asm/tlbflush.h>
+#ifdef CONFIG_MMU
+extern void check_pgt_cache(void);
+#else
#define check_pgt_cache() do {} while(0)
+#endif
/*
* we don't need any special per-pte or per-vma handling...
diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index 3503ad66945..118e9812778 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -122,21 +122,21 @@ static inline int pfn_valid(int pfn)
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0)
#define alloc_bootmem_node(pgdat, x) \
({ \
- struct pglist_data __attribute__ ((unused)) \
+ struct pglist_data __maybe_unused \
*__alloc_bootmem_node__pgdat = (pgdat); \
__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, \
__pa(MAX_DMA_ADDRESS)); \
})
#define alloc_bootmem_pages_node(pgdat, x) \
({ \
- struct pglist_data __attribute__ ((unused)) \
+ struct pglist_data __maybe_unused \
*__alloc_bootmem_node__pgdat = (pgdat); \
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, \
__pa(MAX_DMA_ADDRESS)) \
})
#define alloc_bootmem_low_pages_node(pgdat, x) \
({ \
- struct pglist_data __attribute__ ((unused)) \
+ struct pglist_data __maybe_unused \
*__alloc_bootmem_node__pgdat = (pgdat); \
__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0); \
})
diff --git a/include/asm-i386/msr.h b/include/asm-i386/msr.h
index 26861df52cc..df21ea04936 100644
--- a/include/asm-i386/msr.h
+++ b/include/asm-i386/msr.h
@@ -86,62 +86,50 @@ static inline unsigned long long native_read_pmc(void)
#define rdmsr(msr,val1,val2) \
do { \
- unsigned long long __val = native_read_msr(msr); \
- val1 = __val; \
- val2 = __val >> 32; \
+ u64 __val = native_read_msr(msr); \
+ (val1) = (u32)__val; \
+ (val2) = (u32)(__val >> 32); \
} while(0)
-#define wrmsr(msr,val1,val2) \
- native_write_msr(msr, ((unsigned long long)val2 << 32) | val1)
-
-#define rdmsrl(msr,val) \
- do { \
- (val) = native_read_msr(msr); \
- } while(0)
-
-static inline void wrmsrl (unsigned long msr, unsigned long long val)
+static inline void wrmsr(u32 __msr, u32 __low, u32 __high)
{
- unsigned long lo, hi;
- lo = (unsigned long) val;
- hi = val >> 32;
- wrmsr (msr, lo, hi);
+ native_write_msr(__msr, ((u64)__high << 32) | __low);
}
+#define rdmsrl(msr,val) \
+ ((val) = native_read_msr(msr))
+
+#define wrmsrl(msr,val) native_write_msr(msr, val)
+
/* wrmsr with exception handling */
-#define wrmsr_safe(msr,val1,val2) \
- (native_write_msr_safe(msr, ((unsigned long long)val2 << 32) | val1))
+static inline int wrmsr_safe(u32 __msr, u32 __low, u32 __high)
+{
+ return native_write_msr_safe(__msr, ((u64)__high << 32) | __low);
+}
/* rdmsr with exception handling */
#define rdmsr_safe(msr,p1,p2) \
({ \
int __err; \
- unsigned long long __val = native_read_msr_safe(msr, &__err);\
- (*p1) = __val; \
- (*p2) = __val >> 32; \
+ u64 __val = native_read_msr_safe(msr, &__err); \
+ (*p1) = (u32)__val; \
+ (*p2) = (u32)(__val >> 32); \
__err; \
})
-#define rdtsc(low,high) \
- do { \
- u64 _l = native_read_tsc(); \
- (low) = (u32)_l; \
- (high) = _l >> 32; \
- } while(0)
-
#define rdtscl(low) \
- do { \
- (low) = native_read_tsc(); \
- } while(0)
+ ((low) = (u32)native_read_tsc())
-#define rdtscll(val) ((val) = native_read_tsc())
+#define rdtscll(val) \
+ ((val) = native_read_tsc())
#define write_tsc(val1,val2) wrmsr(0x10, val1, val2)
#define rdpmc(counter,low,high) \
do { \
u64 _l = native_read_pmc(); \
- low = (u32)_l; \
- high = _l >> 32; \
+ (low) = (u32)_l; \
+ (high) = (u32)(_l >> 32); \
} while(0)
#endif /* !CONFIG_PARAVIRT */
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index e2e7f98723c..bc5c12c1358 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -560,11 +560,6 @@ static inline u64 paravirt_read_tsc(void)
{
return PVOP_CALL0(u64, read_tsc);
}
-#define rdtsc(low,high) do { \
- u64 _l = paravirt_read_tsc(); \
- low = (u32)_l; \
- high = _l >> 32; \
-} while(0)
#define rdtscl(low) do { \
u64 _l = paravirt_read_tsc(); \
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h
index 090abc1da32..0c713278706 100644
--- a/include/asm-i386/smp.h
+++ b/include/asm-i386/smp.h
@@ -124,20 +124,6 @@ static inline int num_booting_cpus(void)
return cpus_weight(cpu_callout_map);
}
-#ifdef CONFIG_X86_LOCAL_APIC
-
-#ifdef APIC_DEFINITION
-extern int hard_smp_processor_id(void);
-#else
-#include <mach_apicdef.h>
-static inline int hard_smp_processor_id(void)
-{
- /* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
-}
-#endif
-#endif
-
extern int safe_smp_processor_id(void);
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
@@ -152,10 +138,31 @@ extern unsigned int num_processors;
#define NO_PROC_ID 0xFF /* No processor magic marker */
-#endif
+#endif /* CONFIG_SMP */
#ifndef __ASSEMBLY__
+#ifdef CONFIG_X86_LOCAL_APIC
+
+#ifdef APIC_DEFINITION
+extern int hard_smp_processor_id(void);
+#else
+#include <mach_apicdef.h>
+static inline int hard_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
+}
+#endif /* APIC_DEFINITION */
+
+#else /* CONFIG_X86_LOCAL_APIC */
+
+#ifndef CONFIG_SMP
+#define hard_smp_processor_id() 0
+#endif
+
+#endif /* CONFIG_X86_LOCAL_APIC */
+
extern u8 apicid_2_node[];
#ifdef CONFIG_X86_LOCAL_APIC
diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h
index bf01d4b342b..4cb0f91ae64 100644
--- a/include/asm-i386/thread_info.h
+++ b/include/asm-i386/thread_info.h
@@ -172,7 +172,7 @@ static inline struct thread_info *current_thread_info(void)
#define TS_USEDFPU 0x0001 /* FPU was used by this task this quantum (SMP) */
#define TS_POLLING 0x0002 /* True if in idle loop and not sleeping */
-#define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING)
+#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
#endif /* __KERNEL__ */
diff --git a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
index 60fd4ae014f..c60024989eb 100644
--- a/include/asm-ia64/smp.h
+++ b/include/asm-ia64/smp.h
@@ -38,6 +38,8 @@ ia64_get_lid (void)
return lid.f.id << 8 | lid.f.eid;
}
+#define hard_smp_processor_id() ia64_get_lid()
+
#ifdef CONFIG_SMP
#define XTP_OFFSET 0x1e0008
@@ -110,8 +112,6 @@ max_xtp (void)
writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */
}
-#define hard_smp_processor_id() ia64_get_lid()
-
/* Upping and downing of CPUs */
extern int __cpu_disable (void);
extern void __cpu_die (unsigned int cpu);
@@ -128,7 +128,7 @@ extern void unlock_ipi_calllock(void);
extern void identify_siblings (struct cpuinfo_ia64 *);
extern int is_multithreading_enabled(void);
-#else
+#else /* CONFIG_SMP */
#define cpu_logical_id(i) 0
#define cpu_physical_id(i) ia64_get_lid()
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 91698599f91..d2814750658 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -110,6 +110,6 @@ struct thread_info {
#define TS_POLLING 1 /* true if in idle loop and not sleeping */
-#define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING)
+#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
#endif /* _ASM_IA64_THREAD_INFO_H */
diff --git a/include/asm-m32r/smp.h b/include/asm-m32r/smp.h
index abd937ac523..078e1a51a04 100644
--- a/include/asm-m32r/smp.h
+++ b/include/asm-m32r/smp.h
@@ -108,6 +108,10 @@ extern unsigned long send_IPI_mask_phys(cpumask_t, int, int);
#define IPI_SHIFT (0)
#define NR_IPIS (8)
-#endif /* CONFIG_SMP */
+#else /* CONFIG_SMP */
+
+#define hard_smp_processor_id() 0
+
+#endif /* CONFIG_SMP */
#endif /* _ASM_M32R_SMP_H */
diff --git a/include/asm-m68k/thread_info.h b/include/asm-m68k/thread_info.h
index c4d622a57df..d635a375248 100644
--- a/include/asm-m68k/thread_info.h
+++ b/include/asm-m68k/thread_info.h
@@ -37,17 +37,17 @@ struct thread_info {
#define init_stack (init_thread_union.stack)
#define task_thread_info(tsk) (&(tsk)->thread.info)
-#define task_stack_page(tsk) ((void *)(tsk)->thread_info)
+#define task_stack_page(tsk) ((tsk)->stack)
#define current_thread_info() task_thread_info(current)
#define __HAVE_THREAD_FUNCTIONS
#define setup_thread_stack(p, org) ({ \
- *(struct task_struct **)(p)->thread_info = (p); \
+ *(struct task_struct **)(p)->stack = (p); \
task_thread_info(p)->task = (p); \
})
-#define end_of_stack(p) ((unsigned long *)(p)->thread_info + 1)
+#define end_of_stack(p) ((unsigned long *)(p)->stack + 1)
/* entry.S relies on these definitions!
* bits 0-7 are tested at every exception exit
diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index 30f23a2b46c..3713d256d36 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -55,7 +55,7 @@ do { \
if (cpu_has_dsp) \
__save_dsp(prev); \
next->thread.emulated_fp = 0; \
- (last) = resume(prev, next, next->thread_info); \
+ (last) = resume(prev, next, task_thread_info(next)); \
if (cpu_has_dsp) \
__restore_dsp(current); \
} while(0)
diff --git a/include/asm-parisc/compat.h b/include/asm-parisc/compat.h
index fe857902353..11f4222597a 100644
--- a/include/asm-parisc/compat.h
+++ b/include/asm-parisc/compat.h
@@ -152,7 +152,7 @@ static __inline__ void __user *compat_alloc_user_space(long len)
static inline int __is_compat_task(struct task_struct *t)
{
- return test_ti_thread_flag(t->thread_info, TIF_32BIT);
+ return test_ti_thread_flag(task_thread_info(t), TIF_32BIT);
}
static inline int is_compat_task(void)
diff --git a/include/asm-powerpc/smp.h b/include/asm-powerpc/smp.h
index 01717f266dc..d037f50580e 100644
--- a/include/asm-powerpc/smp.h
+++ b/include/asm-powerpc/smp.h
@@ -83,6 +83,7 @@ extern void __cpu_die(unsigned int cpu);
#else
/* for UP */
+#define hard_smp_processor_id() 0
#define smp_setup_cpu_maps()
#endif /* CONFIG_SMP */
diff --git a/include/asm-s390/smp.h b/include/asm-s390/smp.h
index 0a28e6d6ef4..76e424f718c 100644
--- a/include/asm-s390/smp.h
+++ b/include/asm-s390/smp.h
@@ -110,6 +110,7 @@ static inline void smp_send_stop(void)
__load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK);
}
+#define hard_smp_processor_id() 0
#define smp_cpu_not_running(cpu) 1
#define smp_setup_cpu_possible_map() do { } while (0)
#endif
diff --git a/include/asm-sh/cpu-sh3/dma.h b/include/asm-sh/cpu-sh3/dma.h
index 954801b4602..3a66dc45802 100644
--- a/include/asm-sh/cpu-sh3/dma.h
+++ b/include/asm-sh/cpu-sh3/dma.h
@@ -26,7 +26,7 @@ enum {
XMIT_SZ_128BIT,
};
-static unsigned int ts_shift[] __attribute__ ((used)) = {
+static unsigned int ts_shift[] __maybe_unused = {
[XMIT_SZ_8BIT] = 0,
[XMIT_SZ_16BIT] = 1,
[XMIT_SZ_32BIT] = 2,
diff --git a/include/asm-sh/cpu-sh4/dma-sh7780.h b/include/asm-sh/cpu-sh4/dma-sh7780.h
index 6c90d28331b..71b426a6e48 100644
--- a/include/asm-sh/cpu-sh4/dma-sh7780.h
+++ b/include/asm-sh/cpu-sh4/dma-sh7780.h
@@ -28,7 +28,7 @@ enum {
/*
* The DMA count is defined as the number of bytes to transfer.
*/
-static unsigned int __attribute__ ((used)) ts_shift[] = {
+static unsigned int ts_shift[] __maybe_unused = {
[XMIT_SZ_8BIT] = 0,
[XMIT_SZ_16BIT] = 1,
[XMIT_SZ_32BIT] = 2,
diff --git a/include/asm-sh/cpu-sh4/dma.h b/include/asm-sh/cpu-sh4/dma.h
index c135e9cebd9..36e26a96476 100644
--- a/include/asm-sh/cpu-sh4/dma.h
+++ b/include/asm-sh/cpu-sh4/dma.h
@@ -53,7 +53,7 @@ enum {
/*
* The DMA count is defined as the number of bytes to transfer.
*/
-static unsigned int ts_shift[] __attribute__ ((used)) = {
+static unsigned int ts_shift[] __maybe_unused = {
[XMIT_SZ_64BIT] = 3,
[XMIT_SZ_8BIT] = 0,
[XMIT_SZ_16BIT] = 1,
diff --git a/include/asm-sparc/smp.h b/include/asm-sparc/smp.h
index b9da9a600e3..b3f492208fd 100644
--- a/include/asm-sparc/smp.h
+++ b/include/asm-sparc/smp.h
@@ -165,6 +165,7 @@ void smp_setup_cpu_possible_map(void);
#else /* SMP */
+#define hard_smp_processor_id() 0
#define smp_setup_cpu_possible_map() do { } while (0)
#endif /* !(SMP) */
diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h
index cca54804b72..869d16fb907 100644
--- a/include/asm-sparc64/smp.h
+++ b/include/asm-sparc64/smp.h
@@ -48,6 +48,7 @@ extern unsigned char boot_cpu_id;
#else
+#define hard_smp_processor_id() 0
#define smp_setup_cpu_possible_map() do { } while (0)
#define boot_cpu_id (0)
diff --git a/include/asm-um/required-features.h b/include/asm-um/required-features.h
new file mode 100644
index 00000000000..dfb967b2d2f
--- /dev/null
+++ b/include/asm-um/required-features.h
@@ -0,0 +1,9 @@
+#ifndef __UM_REQUIRED_FEATURES_H
+#define __UM_REQUIRED_FEATURES_H
+
+/*
+ * Nothing to see, just need something for the i386 and x86_64 asm
+ * headers to include.
+ */
+
+#endif
diff --git a/include/asm-um/smp.h b/include/asm-um/smp.h
index ca552261ed1..84f8cf29324 100644
--- a/include/asm-um/smp.h
+++ b/include/asm-um/smp.h
@@ -24,6 +24,10 @@ extern inline void smp_cpus_done(unsigned int maxcpus)
extern struct task_struct *idle_threads[NR_CPUS];
+#else
+
+#define hard_smp_processor_id() 0
+
#endif
#endif
diff --git a/include/asm-x86_64/smp.h b/include/asm-x86_64/smp.h
index d5704421456..3f303d2365e 100644
--- a/include/asm-x86_64/smp.h
+++ b/include/asm-x86_64/smp.h
@@ -57,12 +57,6 @@ static inline int num_booting_cpus(void)
#define raw_smp_processor_id() read_pda(cpunumber)
-static inline int hard_smp_processor_id(void)
-{
- /* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
-}
-
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
extern void prefill_possible_map(void);
@@ -71,7 +65,13 @@ extern unsigned __cpuinitdata disabled_cpus;
#define NO_PROC_ID 0xFF /* No processor magic marker */
-#endif
+#endif /* CONFIG_SMP */
+
+static inline int hard_smp_processor_id(void)
+{
+ /* we don't want to mark this access volatile - bad code generation */
+ return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
+}
/*
* Some lowlevel functions might want to know about
diff --git a/include/asm-x86_64/system.h b/include/asm-x86_64/system.h
index b7b8021e8c4..ead9f9a5623 100644
--- a/include/asm-x86_64/system.h
+++ b/include/asm-x86_64/system.h
@@ -39,7 +39,7 @@
[threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
[ti_flags] "i" (offsetof(struct thread_info, flags)),\
[tif_fork] "i" (TIF_FORK), \
- [thread_info] "i" (offsetof(struct task_struct, thread_info)), \
+ [thread_info] "i" (offsetof(struct task_struct, stack)), \
[pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
: "memory", "cc" __EXTRA_CLOBBER)
diff --git a/include/asm-x86_64/thread_info.h b/include/asm-x86_64/thread_info.h
index 74a6c74397f..10bb5a8ed68 100644
--- a/include/asm-x86_64/thread_info.h
+++ b/include/asm-x86_64/thread_info.h
@@ -162,7 +162,7 @@ static inline struct thread_info *stack_thread_info(void)
#define TS_COMPAT 0x0002 /* 32bit syscall active */
#define TS_POLLING 0x0004 /* true if in idle loop and not sleeping */
-#define tsk_is_polling(t) ((t)->thread_info->status & TS_POLLING)
+#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
#endif /* __KERNEL__ */
diff --git a/include/linux/aio.h b/include/linux/aio.h
index a30ef13c9e6..43dc2ebfaa0 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -226,7 +226,8 @@ int FASTCALL(io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
__put_ioctx(kioctx); \
} while (0)
-#define in_aio() !is_sync_wait(current->io_wait)
+#define in_aio() (unlikely(!is_sync_wait(current->io_wait)))
+
/* may be used for debugging */
#define warn_if_async() \
do { \
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a686eabe22d..db5b00a792f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -854,7 +854,7 @@ static inline void put_dev_sector(Sector p)
struct work_struct;
int kblockd_schedule_work(struct work_struct *work);
-void kblockd_flush(void);
+void kblockd_flush_work(struct work_struct *work);
#define MODULE_ALIAS_BLOCKDEV(major,minor) \
MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 2665ca04cf8..bf297b03a4e 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -49,6 +49,7 @@ struct clocksource;
* @shift: cycle to nanosecond divisor (power of two)
* @flags: flags describing special properties
* @vread: vsyscall based read
+ * @resume: resume function for the clocksource, if necessary
* @cycle_interval: Used internally by timekeeping core, please ignore.
* @xtime_interval: Used internally by timekeeping core, please ignore.
*/
@@ -65,6 +66,7 @@ struct clocksource {
u32 shift;
unsigned long flags;
cycle_t (*vread)(void);
+ void (*resume)(void);
/* timekeeping specific data, ignore */
cycle_t cycle_interval;
@@ -209,6 +211,7 @@ static inline void clocksource_calculate_interval(struct clocksource *c,
extern int clocksource_register(struct clocksource*);
extern struct clocksource* clocksource_get_next(void);
extern void clocksource_change_rating(struct clocksource *cs, int rating);
+extern void clocksource_resume(void);
#ifdef CONFIG_GENERIC_TIME_VSYSCALL
extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
diff --git a/include/linux/compat.h b/include/linux/compat.h
index ccd863dd77f..70a157a130b 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -253,5 +253,8 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
const compat_sigset_t __user *sigmask,
compat_size_t sigsetsize);
+asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename,
+ struct compat_timespec __user *t, int flags);
+
#endif /* CONFIG_COMPAT */
#endif /* _LINUX_COMPAT_H */
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index a9f794716a8..03ec2311fb2 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -40,3 +40,4 @@
#define noinline __attribute__((noinline))
#define __attribute_pure__ __attribute__((pure))
#define __attribute_const__ __attribute__((__const__))
+#define __maybe_unused __attribute__((unused))
diff --git a/include/linux/compiler-gcc3.h b/include/linux/compiler-gcc3.h
index ecd621fd27d..a9e2863c2db 100644
--- a/include/linux/compiler-gcc3.h
+++ b/include/linux/compiler-gcc3.h
@@ -4,9 +4,11 @@
#include <linux/compiler-gcc.h>
#if __GNUC_MINOR__ >= 3
-# define __attribute_used__ __attribute__((__used__))
+# define __used __attribute__((__used__))
+# define __attribute_used__ __used /* deprecated */
#else
-# define __attribute_used__ __attribute__((__unused__))
+# define __used __attribute__((__unused__))
+# define __attribute_used__ __used /* deprecated */
#endif
#if __GNUC_MINOR__ >= 4
diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index fd0cc7c4a63..a03e9398a6c 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -12,7 +12,8 @@
# define __inline __inline __attribute__((always_inline))
#endif
-#define __attribute_used__ __attribute__((__used__))
+#define __used __attribute__((__used__))
+#define __attribute_used__ __used /* deprecated */
#define __must_check __attribute__((warn_unused_result))
#define __compiler_offsetof(a,b) __builtin_offsetof(a,b)
#define __always_inline inline __attribute__((always_inline))
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 3b6949b4174..498c3592076 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -108,15 +108,30 @@ extern void __chk_io_ptr(const void __iomem *);
* Allow us to avoid 'defined but not used' warnings on functions and data,
* as well as force them to be emitted to the assembly file.
*
- * As of gcc 3.3, static functions that are not marked with attribute((used))
- * may be elided from the assembly file. As of gcc 3.3, static data not so
+ * As of gcc 3.4, static functions that are not marked with attribute((used))
+ * may be elided from the assembly file. As of gcc 3.4, static data not so
* marked will not be elided, but this may change in a future gcc version.
*
+ * NOTE: Because distributions shipped with a backported unit-at-a-time
+ * compiler in gcc 3.3, we must define __used to be __attribute__((used))
+ * for gcc >=3.3 instead of 3.4.
+ *
* In prior versions of gcc, such functions and data would be emitted, but
* would be warned about except with attribute((unused)).
+ *
+ * Mark functions that are referenced only in inline assembly as __used so
+ * the code is emitted even though it appears to be unreferenced.
*/
#ifndef __attribute_used__
-# define __attribute_used__ /* unimplemented */
+# define __attribute_used__ /* deprecated */
+#endif
+
+#ifndef __used
+# define __used /* unimplemented */
+#endif
+
+#ifndef __maybe_unused
+# define __maybe_unused /* unimplemented */
#endif
/*
diff --git a/include/linux/fb.h b/include/linux/fb.h
index dff7a728948..c654d0e9ce3 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -868,7 +868,7 @@ struct fb_info {
#define fb_writeq sbus_writeq
#define fb_memset sbus_memset_io
-#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || (defined(__sh__) && !defined(__SH5__)) || defined(__powerpc__)
+#elif defined(__i386__) || defined(__alpha__) || defined(__x86_64__) || defined(__hppa__) || (defined(__sh__) && !defined(__SH5__)) || defined(__powerpc__) || defined(__avr32__)
#define fb_readb __raw_readb
#define fb_readw __raw_readw
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 820125c628c..899fc7f20ed 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -3,6 +3,8 @@
#include <linux/sched.h>
+union ktime;
+
/* Second argument to futex syscall */
@@ -15,6 +17,19 @@
#define FUTEX_LOCK_PI 6
#define FUTEX_UNLOCK_PI 7
#define FUTEX_TRYLOCK_PI 8
+#define FUTEX_CMP_REQUEUE_PI 9
+
+#define FUTEX_PRIVATE_FLAG 128
+#define FUTEX_CMD_MASK ~FUTEX_PRIVATE_FLAG
+
+#define FUTEX_WAIT_PRIVATE (FUTEX_WAIT | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAKE_PRIVATE (FUTEX_WAKE | FUTEX_PRIVATE_FLAG)
+#define FUTEX_REQUEUE_PRIVATE (FUTEX_REQUEUE | FUTEX_PRIVATE_FLAG)
+#define FUTEX_CMP_REQUEUE_PRIVATE (FUTEX_CMP_REQUEUE | FUTEX_PRIVATE_FLAG)
+#define FUTEX_WAKE_OP_PRIVATE (FUTEX_WAKE_OP | FUTEX_PRIVATE_FLAG)
+#define FUTEX_LOCK_PI_PRIVATE (FUTEX_LOCK_PI | FUTEX_PRIVATE_FLAG)
+#define FUTEX_UNLOCK_PI_PRIVATE (FUTEX_UNLOCK_PI | FUTEX_PRIVATE_FLAG)
+#define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
/*
* Support for robust futexes: the kernel cleans up held futexes at
@@ -83,9 +98,14 @@ struct robust_list_head {
#define FUTEX_OWNER_DIED 0x40000000
/*
+ * Some processes have been requeued on this PI-futex
+ */
+#define FUTEX_WAITER_REQUEUED 0x20000000
+
+/*
* The rest of the robust-futex field is for the TID:
*/
-#define FUTEX_TID_MASK 0x3fffffff
+#define FUTEX_TID_MASK 0x0fffffff
/*
* This limit protects against a deliberately circular list.
@@ -94,7 +114,7 @@ struct robust_list_head {
#define ROBUST_LIST_LIMIT 2048
#ifdef __KERNEL__
-long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, union ktime *timeout,
u32 __user *uaddr2, u32 val2, u32 val3);
extern int
@@ -106,9 +126,20 @@ handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi);
* Don't rearrange members without looking at hash_futex().
*
* offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
- * We set bit 0 to indicate if it's an inode-based key.
- */
+ * We use the two low order bits of offset to tell what is the kind of key :
+ * 00 : Private process futex (PTHREAD_PROCESS_PRIVATE)
+ * (no reference on an inode or mm)
+ * 01 : Shared futex (PTHREAD_PROCESS_SHARED)
+ * mapped on a file (reference on the underlying inode)
+ * 10 : Shared futex (PTHREAD_PROCESS_SHARED)
+ * (but private mapping on an mm, and reference taken on it)
+*/
+
+#define FUT_OFF_INODE 1 /* We set bit 0 if key has a reference on inode */
+#define FUT_OFF_MMSHARED 2 /* We set bit 1 if key has a reference on mm */
+
union futex_key {
+ u32 __user *uaddr;
struct {
unsigned long pgoff;
struct inode *inode;
@@ -125,7 +156,8 @@ union futex_key {
int offset;
} both;
};
-int get_futex_key(u32 __user *uaddr, union futex_key *key);
+int get_futex_key(u32 __user *uaddr, struct rw_semaphore *shared,
+ union futex_key *key);
void get_futex_key_refs(union futex_key *key);
void drop_futex_key_refs(union futex_key *key);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 2c65da7cabb..f589559cf07 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -413,6 +413,7 @@ char *disk_name (struct gendisk *hd, int part, char *buf);
extern int rescan_partitions(struct gendisk *disk, struct block_device *bdev);
extern void add_partition(struct gendisk *, int, sector_t, sector_t, int);
extern void delete_partition(struct gendisk *, int);
+extern void printk_all_partitions(void);
extern struct gendisk *alloc_disk_node(int minors, int node_id);
extern struct gendisk *alloc_disk(int minors);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 97a36c3d96e..0d2ef0b082a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -176,10 +176,6 @@ extern void FASTCALL(free_cold_page(struct page *page));
#define free_page(addr) free_pages((addr),0)
void page_alloc_init(void);
-#ifdef CONFIG_NUMA
-void drain_node_pages(int node);
-#else
-static inline void drain_node_pages(int node) { };
-#endif
+void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
#endif /* __LINUX_GFP_H */
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index a515eb0afdf..98e2cce996a 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -94,17 +94,26 @@ static inline void clear_highpage(struct page *page)
/*
* Same but also flushes aliased cache contents to RAM.
+ *
+ * This must be a macro because KM_USER0 and friends aren't defined if
+ * !CONFIG_HIGHMEM
*/
-static inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size)
+#define zero_user_page(page, offset, size, km_type) \
+ do { \
+ void *kaddr; \
+ \
+ BUG_ON((offset) + (size) > PAGE_SIZE); \
+ \
+ kaddr = kmap_atomic(page, km_type); \
+ memset((char *)kaddr + (offset), 0, (size)); \
+ flush_dcache_page(page); \
+ kunmap_atomic(kaddr, (km_type)); \
+ } while (0)
+
+static inline void __deprecated memclear_highpage_flush(struct page *page,
+ unsigned int offset, unsigned int size)
{
- void *kaddr;
-
- BUG_ON(offset + size > PAGE_SIZE);
-
- kaddr = kmap_atomic(page, KM_USER0);
- memset((char *)kaddr + offset, 0, size);
- flush_dcache_page(page);
- kunmap_atomic(kaddr, KM_USER0);
+ zero_user_page(page, offset, size, KM_USER0);
}
#ifndef __HAVE_ARCH_COPY_USER_HIGHPAGE
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 795102309bf..45170b2fa25 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -95,7 +95,7 @@ extern struct group_info init_groups;
#define INIT_TASK(tsk) \
{ \
.state = 0, \
- .thread_info = &init_thread_info, \
+ .stack = &init_thread_info, \
.usage = ATOMIC_INIT(2), \
.flags = 0, \
.lock_depth = -1, \
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index 1c65e7a9f18..00dd957e245 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -30,4 +30,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu);
int kthread_stop(struct task_struct *k);
int kthread_should_stop(void);
+int kthreadd(void *unused);
+extern struct task_struct *kthreadd_task;
+
#endif /* _LINUX_KTHREAD_H */
diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index 81bb9c7a4eb..c762954bda1 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -43,7 +43,7 @@
* plain scalar nanosecond based representation can be selected by the
* config switch CONFIG_KTIME_SCALAR.
*/
-typedef union {
+union ktime {
s64 tv64;
#if BITS_PER_LONG != 64 && !defined(CONFIG_KTIME_SCALAR)
struct {
@@ -54,7 +54,9 @@ typedef union {
# endif
} tv;
#endif
-} ktime_t;
+};
+
+typedef union ktime ktime_t; /* Kill this */
#define KTIME_MAX ((s64)~((u64)1 << 63))
#if (BITS_PER_LONG == 64)
diff --git a/include/linux/mca.h b/include/linux/mca.h
index 5cff2923092..37972704617 100644
--- a/include/linux/mca.h
+++ b/include/linux/mca.h
@@ -94,6 +94,7 @@ struct mca_bus {
struct mca_driver {
const short *id_table;
void *driver_data;
+ int integrated_id;
struct device_driver driver;
};
#define to_mca_driver(mdriver) container_of(mdriver, struct mca_driver, driver)
@@ -125,6 +126,7 @@ extern enum MCA_AdapterStatus mca_device_status(struct mca_device *mca_dev);
extern struct bus_type mca_bus_type;
extern int mca_register_driver(struct mca_driver *drv);
+extern int mca_register_driver_integrated(struct mca_driver *, int);
extern void mca_unregister_driver(struct mca_driver *drv);
/* WARNING: only called by the boot time device setup */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 2f1544e8304..d09b1345a3a 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -83,6 +83,9 @@ struct per_cpu_pages {
struct per_cpu_pageset {
struct per_cpu_pages pcp[2]; /* 0: hot. 1: cold */
+#ifdef CONFIG_NUMA
+ s8 expire;
+#endif
#ifdef CONFIG_SMP
s8 stat_threshold;
s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
diff --git a/include/linux/module.h b/include/linux/module.h
index 6d3dc9c4ff9..792d483c9af 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -356,6 +356,9 @@ struct module
keeping pointers to this stuff */
char *args;
};
+#ifndef MODULE_ARCH_INIT
+#define MODULE_ARCH_INIT {}
+#endif
/* FIXME: It'd be nice to isolate modules during init, too, so they
aren't used before they (may) fail. But presently too much code
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index b81bc2adaef..0d50ea3df68 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -121,11 +121,12 @@ static inline int fastcall mutex_is_locked(struct mutex *lock)
* Also see Documentation/mutex-design.txt.
*/
extern void fastcall mutex_lock(struct mutex *lock);
-extern int fastcall mutex_lock_interruptible(struct mutex *lock);
+extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
-extern int mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass);
+extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock,
+ unsigned int subclass);
#else
# define mutex_lock_nested(lock, subclass) mutex_lock(lock)
# define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
diff --git a/include/linux/nfs4_acl.h b/include/linux/nfs4_acl.h
index 409b6e02f33..c9c05a78e9b 100644
--- a/include/linux/nfs4_acl.h
+++ b/include/linux/nfs4_acl.h
@@ -44,7 +44,6 @@
#define NFS4_ACL_MAX 170
struct nfs4_acl *nfs4_acl_new(int);
-void nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, u32, int, uid_t);
int nfs4_acl_get_whotype(char *, u32);
int nfs4_acl_write_who(int who, char *p);
int nfs4_acl_permission(struct nfs4_acl *acl, uid_t owner, gid_t group,
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 10a43ed0527..9431101bf87 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -112,32 +112,40 @@ extern void srcu_init_notifier_head(struct srcu_notifier_head *nh);
#ifdef __KERNEL__
-extern int atomic_notifier_chain_register(struct atomic_notifier_head *,
- struct notifier_block *);
-extern int blocking_notifier_chain_register(struct blocking_notifier_head *,
- struct notifier_block *);
-extern int raw_notifier_chain_register(struct raw_notifier_head *,
- struct notifier_block *);
-extern int srcu_notifier_chain_register(struct srcu_notifier_head *,
- struct notifier_block *);
-
-extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *,
- struct notifier_block *);
-extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *,
- struct notifier_block *);
-extern int raw_notifier_chain_unregister(struct raw_notifier_head *,
- struct notifier_block *);
-extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *,
- struct notifier_block *);
-
-extern int atomic_notifier_call_chain(struct atomic_notifier_head *,
+extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh,
+ struct notifier_block *nb);
+extern int blocking_notifier_chain_register(struct blocking_notifier_head *nh,
+ struct notifier_block *nb);
+extern int raw_notifier_chain_register(struct raw_notifier_head *nh,
+ struct notifier_block *nb);
+extern int srcu_notifier_chain_register(struct srcu_notifier_head *nh,
+ struct notifier_block *nb);
+
+extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh,
+ struct notifier_block *nb);
+extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh,
+ struct notifier_block *nb);
+extern int raw_notifier_chain_unregister(struct raw_notifier_head *nh,
+ struct notifier_block *nb);
+extern int srcu_notifier_chain_unregister(struct srcu_notifier_head *nh,
+ struct notifier_block *nb);
+
+extern int atomic_notifier_call_chain(struct atomic_notifier_head *nh,
unsigned long val, void *v);
-extern int blocking_notifier_call_chain(struct blocking_notifier_head *,
+extern int __atomic_notifier_call_chain(struct atomic_notifier_head *nh,
+ unsigned long val, void *v, int nr_to_call, int *nr_calls);
+extern int blocking_notifier_call_chain(struct blocking_notifier_head *nh,
unsigned long val, void *v);
-extern int raw_notifier_call_chain(struct raw_notifier_head *,
+extern int __blocking_notifier_call_chain(struct blocking_notifier_head *nh,
+ unsigned long val, void *v, int nr_to_call, int *nr_calls);
+extern int raw_notifier_call_chain(struct raw_notifier_head *nh,
unsigned long val, void *v);
-extern int srcu_notifier_call_chain(struct srcu_notifier_head *,
+extern int __raw_notifier_call_chain(struct raw_notifier_head *nh,
+ unsigned long val, void *v, int nr_to_call, int *nr_calls);
+extern int srcu_notifier_call_chain(struct srcu_notifier_head *nh,
unsigned long val, void *v);
+extern int __srcu_notifier_call_chain(struct srcu_notifier_head *nh,
+ unsigned long val, void *v, int nr_to_call, int *nr_calls);
#define NOTIFY_DONE 0x0000 /* Don't care */
#define NOTIFY_OK 0x0001 /* Suits me */
@@ -186,6 +194,20 @@ extern int srcu_notifier_call_chain(struct srcu_notifier_head *,
#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */
#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */
#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
+#define CPU_LOCK_ACQUIRE 0x0008 /* Acquire all hotcpu locks */
+#define CPU_LOCK_RELEASE 0x0009 /* Release all hotcpu locks */
+
+/* Used for CPU hotplug events occuring while tasks are frozen due to a suspend
+ * operation in progress
+ */
+#define CPU_TASKS_FROZEN 0x0010
+
+#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN)
+#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN)
+#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
+#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
#endif /* __KERNEL__ */
#endif /* _LINUX_NOTIFIER_H */
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 6e8fa3049e5..87545e0f0b5 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -107,26 +107,11 @@ typedef int __bitwise suspend_state_t;
#define PM_SUSPEND_ON ((__force suspend_state_t) 0)
#define PM_SUSPEND_STANDBY ((__force suspend_state_t) 1)
#define PM_SUSPEND_MEM ((__force suspend_state_t) 3)
-#define PM_SUSPEND_DISK ((__force suspend_state_t) 4)
-#define PM_SUSPEND_MAX ((__force suspend_state_t) 5)
-
-typedef int __bitwise suspend_disk_method_t;
-
-/* invalid must be 0 so struct pm_ops initialisers can leave it out */
-#define PM_DISK_INVALID ((__force suspend_disk_method_t) 0)
-#define PM_DISK_PLATFORM ((__force suspend_disk_method_t) 1)
-#define PM_DISK_SHUTDOWN ((__force suspend_disk_method_t) 2)
-#define PM_DISK_REBOOT ((__force suspend_disk_method_t) 3)
-#define PM_DISK_TEST ((__force suspend_disk_method_t) 4)
-#define PM_DISK_TESTPROC ((__force suspend_disk_method_t) 5)
-#define PM_DISK_MAX ((__force suspend_disk_method_t) 6)
+#define PM_SUSPEND_MAX ((__force suspend_state_t) 4)
/**
* struct pm_ops - Callbacks for managing platform dependent suspend states.
* @valid: Callback to determine whether the given state can be entered.
- * If %CONFIG_SOFTWARE_SUSPEND is set then %PM_SUSPEND_DISK is
- * always valid and never passed to this call. If not assigned,
- * no suspend states are valid.
* Valid states are advertised in /sys/power/state but can still
* be rejected by prepare or enter if the conditions aren't right.
* There is a %pm_valid_only_mem function available that can be assigned
@@ -140,24 +125,12 @@ typedef int __bitwise suspend_disk_method_t;
*
* @finish: Called when the system has left the given state and all devices
* are resumed. The return value is ignored.
- *
- * @pm_disk_mode: The generic code always allows one of the shutdown methods
- * %PM_DISK_SHUTDOWN, %PM_DISK_REBOOT, %PM_DISK_TEST and
- * %PM_DISK_TESTPROC. If this variable is set, the mode it is set
- * to is allowed in addition to those modes and is also made default.
- * When this mode is sent selected, the @prepare call will be called
- * before suspending to disk (if present), the @enter call should be
- * present and will be called after all state has been saved and the
- * machine is ready to be powered off; the @finish callback is called
- * after state has been restored. All these calls are called with
- * %PM_SUSPEND_DISK as the state.
*/
struct pm_ops {
int (*valid)(suspend_state_t state);
int (*prepare)(suspend_state_t state);
int (*enter)(suspend_state_t state);
int (*finish)(suspend_state_t state);
- suspend_disk_method_t pm_disk_mode;
};
/**
@@ -276,8 +249,6 @@ extern void device_power_up(void);
extern void device_resume(void);
#ifdef CONFIG_PM
-extern suspend_disk_method_t pm_disk_mode;
-
extern int device_suspend(pm_message_t state);
extern int device_prepare_suspend(pm_message_t state);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index de72c49747c..a121f36f443 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -201,7 +201,6 @@ struct mddev_s
struct mutex reconfig_mutex;
atomic_t active;
- int changed; /* true if we might need to reread partition info */
int degraded; /* whether md should consider
* adding a spare
*/
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 759a0f97bec..6cd8c4425fc 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -12,6 +12,7 @@
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/timer.h>
#include <linux/wait.h>
#include <linux/list.h>
#include <linux/fs.h>
@@ -38,7 +39,7 @@ struct rchan_buf
size_t subbufs_consumed; /* count of sub-buffers consumed */
struct rchan *chan; /* associated channel */
wait_queue_head_t read_wait; /* reader wait queue */
- struct delayed_work wake_readers; /* reader wake-up work struct */
+ struct timer_list timer; /* reader wake-up timer */
struct dentry *dentry; /* channel file dentry */
struct kref kref; /* channel buffer refcount */
struct page **page_array; /* array of current buffer pages */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3d95c480f58..17b72d88c4c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -817,7 +817,7 @@ struct prio_array;
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
- struct thread_info *thread_info;
+ void *stack;
atomic_t usage;
unsigned int flags; /* per process flags, defined below */
unsigned int ptrace;
@@ -1317,6 +1317,7 @@ extern int in_egroup_p(gid_t);
extern void proc_caches_init(void);
extern void flush_signals(struct task_struct *);
+extern void ignore_signals(struct task_struct *);
extern void flush_signal_handlers(struct task_struct *, int force_default);
extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info);
@@ -1512,8 +1513,8 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
#ifndef __HAVE_THREAD_FUNCTIONS
-#define task_thread_info(task) (task)->thread_info
-#define task_stack_page(task) ((void*)((task)->thread_info))
+#define task_thread_info(task) ((struct thread_info *)(task)->stack)
+#define task_stack_page(task) ((task)->stack)
static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
{
@@ -1523,7 +1524,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct
static inline unsigned long *end_of_stack(struct task_struct *p)
{
- return (unsigned long *)(p->thread_info + 1);
+ return (unsigned long *)(task_thread_info(p) + 1);
}
#endif
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 14749056dd6..3fa0fab4a04 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -243,6 +243,131 @@ extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka,
extern struct kmem_cache *sighand_cachep;
+/*
+ * In POSIX a signal is sent either to a specific thread (Linux task)
+ * or to the process as a whole (Linux thread group). How the signal
+ * is sent determines whether it's to one thread or the whole group,
+ * which determines which signal mask(s) are involved in blocking it
+ * from being delivered until later. When the signal is delivered,
+ * either it's caught or ignored by a user handler or it has a default
+ * effect that applies to the whole thread group (POSIX process).
+ *
+ * The possible effects an unblocked signal set to SIG_DFL can have are:
+ * ignore - Nothing Happens
+ * terminate - kill the process, i.e. all threads in the group,
+ * similar to exit_group. The group leader (only) reports
+ * WIFSIGNALED status to its parent.
+ * coredump - write a core dump file describing all threads using
+ * the same mm and then kill all those threads
+ * stop - stop all the threads in the group, i.e. TASK_STOPPED state
+ *
+ * SIGKILL and SIGSTOP cannot be caught, blocked, or ignored.
+ * Other signals when not blocked and set to SIG_DFL behaves as follows.
+ * The job control signals also have other special effects.
+ *
+ * +--------------------+------------------+
+ * | POSIX signal | default action |
+ * +--------------------+------------------+
+ * | SIGHUP | terminate |
+ * | SIGINT | terminate |
+ * | SIGQUIT | coredump |
+ * | SIGILL | coredump |
+ * | SIGTRAP | coredump |
+ * | SIGABRT/SIGIOT | coredump |
+ * | SIGBUS | coredump |
+ * | SIGFPE | coredump |
+ * | SIGKILL | terminate(+) |
+ * | SIGUSR1 | terminate |
+ * | SIGSEGV | coredump |
+ * | SIGUSR2 | terminate |
+ * | SIGPIPE | terminate |
+ * | SIGALRM | terminate |
+ * | SIGTERM | terminate |
+ * | SIGCHLD | ignore |
+ * | SIGCONT | ignore(*) |
+ * | SIGSTOP | stop(*)(+) |
+ * | SIGTSTP | stop(*) |
+ * | SIGTTIN | stop(*) |
+ * | SIGTTOU | stop(*) |
+ * | SIGURG | ignore |
+ * | SIGXCPU | coredump |
+ * | SIGXFSZ | coredump |
+ * | SIGVTALRM | terminate |
+ * | SIGPROF | terminate |
+ * | SIGPOLL/SIGIO | terminate |
+ * | SIGSYS/SIGUNUSED | coredump |
+ * | SIGSTKFLT | terminate |
+ * | SIGWINCH | ignore |
+ * | SIGPWR | terminate |
+ * | SIGRTMIN-SIGRTMAX | terminate |
+ * +--------------------+------------------+
+ * | non-POSIX signal | default action |
+ * +--------------------+------------------+
+ * | SIGEMT | coredump |
+ * +--------------------+------------------+
+ *
+ * (+) For SIGKILL and SIGSTOP the action is "always", not just "default".
+ * (*) Special job control effects:
+ * When SIGCONT is sent, it resumes the process (all threads in the group)
+ * from TASK_STOPPED state and also clears any pending/queued stop signals
+ * (any of those marked with "stop(*)"). This happens regardless of blocking,
+ * catching, or ignoring SIGCONT. When any stop signal is sent, it clears
+ * any pending/queued SIGCONT signals; this happens regardless of blocking,
+ * catching, or ignored the stop signal, though (except for SIGSTOP) the
+ * default action of stopping the process may happen later or never.
+ */
+
+#ifdef SIGEMT
+#define SIGEMT_MASK rt_sigmask(SIGEMT)
+#else
+#define SIGEMT_MASK 0
+#endif
+
+#if SIGRTMIN > BITS_PER_LONG
+#define rt_sigmask(sig) (1ULL << ((sig)-1))
+#else
+#define rt_sigmask(sig) sigmask(sig)
+#endif
+#define siginmask(sig, mask) (rt_sigmask(sig) & (mask))
+
+#define SIG_KERNEL_ONLY_MASK (\
+ rt_sigmask(SIGKILL) | rt_sigmask(SIGSTOP))
+
+#define SIG_KERNEL_STOP_MASK (\
+ rt_sigmask(SIGSTOP) | rt_sigmask(SIGTSTP) | \
+ rt_sigmask(SIGTTIN) | rt_sigmask(SIGTTOU) )
+
+#define SIG_KERNEL_COREDUMP_MASK (\
+ rt_sigmask(SIGQUIT) | rt_sigmask(SIGILL) | \
+ rt_sigmask(SIGTRAP) | rt_sigmask(SIGABRT) | \
+ rt_sigmask(SIGFPE) | rt_sigmask(SIGSEGV) | \
+ rt_sigmask(SIGBUS) | rt_sigmask(SIGSYS) | \
+ rt_sigmask(SIGXCPU) | rt_sigmask(SIGXFSZ) | \
+ SIGEMT_MASK )
+
+#define SIG_KERNEL_IGNORE_MASK (\
+ rt_sigmask(SIGCONT) | rt_sigmask(SIGCHLD) | \
+ rt_sigmask(SIGWINCH) | rt_sigmask(SIGURG) )
+
+#define sig_kernel_only(sig) \
+ (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_ONLY_MASK))
+#define sig_kernel_coredump(sig) \
+ (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_COREDUMP_MASK))
+#define sig_kernel_ignore(sig) \
+ (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_IGNORE_MASK))
+#define sig_kernel_stop(sig) \
+ (((sig) < SIGRTMIN) && siginmask(sig, SIG_KERNEL_STOP_MASK))
+
+#define sig_needs_tasklist(sig) ((sig) == SIGCONT)
+
+#define sig_user_defined(t, signr) \
+ (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \
+ ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN))
+
+#define sig_fatal(t, signr) \
+ (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \
+ (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL)
+
#endif /* __KERNEL__ */
#endif /* _LINUX_SIGNAL_H */
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 7ba23ec8211..3f70149eabb 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -83,7 +83,6 @@ void smp_prepare_boot_cpu(void);
* These macros fold the SMP functionality into a single CPU system
*/
#define raw_smp_processor_id() 0
-#define hard_smp_processor_id() 0
static inline int up_smp_call_function(void)
{
return 0;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 35fa4d5aadd..4a7ae8ab6eb 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -396,4 +396,23 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t);
#define RPC_MAX_ADDRBUFLEN (63U)
+/*
+ * When we want to reduce the size of the reserved space in the response
+ * buffer, we need to take into account the size of any checksum data that
+ * may be at the end of the packet. This is difficult to determine exactly
+ * for all cases without actually generating the checksum, so we just use a
+ * static value.
+ */
+static inline void
+svc_reserve_auth(struct svc_rqst *rqstp, int space)
+{
+ int added_space = 0;
+
+ switch(rqstp->rq_authop->flavour) {
+ case RPC_AUTH_GSS:
+ added_space = RPC_MAX_AUTH_SIZE;
+ }
+ return svc_reserve(rqstp, space + added_space);
+}
+
#endif /* SUNRPC_SVC_H */
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index 7909687557b..e21dd93ac4b 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -37,7 +37,8 @@ struct svc_sock {
atomic_t sk_reserved; /* space on outq that is reserved */
- spinlock_t sk_defer_lock; /* protects sk_deferred */
+ spinlock_t sk_lock; /* protects sk_deferred and
+ * sk_info_authunix */
struct list_head sk_deferred; /* deferred requests that need to
* be revisted */
struct mutex sk_mutex; /* to serialize sending data */
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 9d2aa1a12aa..d74da9122b6 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -32,6 +32,24 @@ static inline int pm_prepare_console(void) { return 0; }
static inline void pm_restore_console(void) {}
#endif
+/**
+ * struct hibernation_ops - hibernation platform support
+ *
+ * The methods in this structure allow a platform to override the default
+ * mechanism of shutting down the machine during a hibernation transition.
+ *
+ * All three methods must be assigned.
+ *
+ * @prepare: prepare system for hibernation
+ * @enter: shut down system after state has been saved to disk
+ * @finish: finish/clean up after state has been reloaded
+ */
+struct hibernation_ops {
+ int (*prepare)(void);
+ int (*enter)(void);
+ void (*finish)(void);
+};
+
#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
/* kernel/power/snapshot.c */
extern void __init register_nosave_region(unsigned long, unsigned long);
@@ -39,11 +57,17 @@ extern int swsusp_page_is_forbidden(struct page *);
extern void swsusp_set_page_free(struct page *);
extern void swsusp_unset_page_free(struct page *);
extern unsigned long get_safe_page(gfp_t gfp_mask);
+
+extern void hibernation_set_ops(struct hibernation_ops *ops);
+extern int hibernate(void);
#else
static inline void register_nosave_region(unsigned long b, unsigned long e) {}
static inline int swsusp_page_is_forbidden(struct page *p) { return 0; }
static inline void swsusp_set_page_free(struct page *p) {}
static inline void swsusp_unset_page_free(struct page *p) {}
+
+static inline void hibernation_set_ops(struct hibernation_ops *ops) {}
+static inline int hibernate(void) { return -ENOSYS; }
#endif /* defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND) */
void save_processor_state(void);
diff --git a/include/linux/svga.h b/include/linux/svga.h
index e1cc552e04f..13ad0b82ac2 100644
--- a/include/linux/svga.h
+++ b/include/linux/svga.h
@@ -113,6 +113,8 @@ void svga_tilefill(struct fb_info *info, struct fb_tilerect *rect);
void svga_tileblit(struct fb_info *info, struct fb_tileblit *blit);
void svga_tilecursor(struct fb_info *info, struct fb_tilecursor *cursor);
int svga_get_tilemax(struct fb_info *info);
+void svga_get_caps(struct fb_info *info, struct fb_blit_caps *caps,
+ struct fb_var_screeninfo *var);
int svga_compute_pll(const struct svga_pll *pll, u32 f_wanted, u16 *m, u16 *n, u16 *r, int node);
int svga_check_timings(const struct svga_timing_regs *tm, struct fb_var_screeninfo *var, int node);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 1912c6cbef5..3139f441229 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -576,6 +576,8 @@ asmlinkage long sys_fstatat64(int dfd, char __user *filename,
struct stat64 __user *statbuf, int flag);
asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *buf,
int bufsiz);
+asmlinkage long sys_utimensat(int dfd, char __user *filename,
+ struct timespec __user *utimes, int flags);
asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename,
struct compat_timeval __user *t);
asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index acb1f105870..d9325cf8a13 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -212,8 +212,6 @@ extern void dec_zone_state(struct zone *, enum zone_stat_item);
extern void __dec_zone_state(struct zone *, enum zone_stat_item);
void refresh_cpu_vm_stats(int);
-void refresh_vm_stats(void);
-
#else /* CONFIG_SMP */
/*
@@ -260,7 +258,6 @@ static inline void __dec_zone_page_state(struct page *page,
#define mod_zone_page_state __mod_zone_page_state
static inline void refresh_cpu_vm_stats(int cpu) { }
-static inline void refresh_vm_stats(void) { }
#endif
#endif /* _LINUX_VMSTAT_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index f16ba1e0687..d555f31c074 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -24,15 +24,13 @@ typedef void (*work_func_t)(struct work_struct *work);
struct work_struct {
atomic_long_t data;
#define WORK_STRUCT_PENDING 0 /* T if work item pending execution */
-#define WORK_STRUCT_NOAUTOREL 1 /* F if work item automatically released on exec */
#define WORK_STRUCT_FLAG_MASK (3UL)
#define WORK_STRUCT_WQ_DATA_MASK (~WORK_STRUCT_FLAG_MASK)
struct list_head entry;
work_func_t func;
};
-#define WORK_DATA_INIT(autorelease) \
- ATOMIC_LONG_INIT((autorelease) << WORK_STRUCT_NOAUTOREL)
+#define WORK_DATA_INIT() ATOMIC_LONG_INIT(0)
struct delayed_work {
struct work_struct work;
@@ -44,14 +42,8 @@ struct execute_work {
};
#define __WORK_INITIALIZER(n, f) { \
- .data = WORK_DATA_INIT(0), \
- .entry = { &(n).entry, &(n).entry }, \
- .func = (f), \
- }
-
-#define __WORK_INITIALIZER_NAR(n, f) { \
- .data = WORK_DATA_INIT(1), \
- .entry = { &(n).entry, &(n).entry }, \
+ .data = WORK_DATA_INIT(), \
+ .entry = { &(n).entry, &(n).entry }, \
.func = (f), \
}
@@ -60,23 +52,12 @@ struct execute_work {
.timer = TIMER_INITIALIZER(NULL, 0, 0), \
}
-#define __DELAYED_WORK_INITIALIZER_NAR(n, f) { \
- .work = __WORK_INITIALIZER_NAR((n).work, (f)), \
- .timer = TIMER_INITIALIZER(NULL, 0, 0), \
- }
-
#define DECLARE_WORK(n, f) \
struct work_struct n = __WORK_INITIALIZER(n, f)
-#define DECLARE_WORK_NAR(n, f) \
- struct work_struct n = __WORK_INITIALIZER_NAR(n, f)
-
#define DECLARE_DELAYED_WORK(n, f) \
struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f)
-#define DECLARE_DELAYED_WORK_NAR(n, f) \
- struct dwork_struct n = __DELAYED_WORK_INITIALIZER_NAR(n, f)
-
/*
* initialize a work item's function pointer
*/
@@ -95,16 +76,9 @@ struct execute_work {
* assignment of the work data initializer allows the compiler
* to generate better code.
*/
-#define INIT_WORK(_work, _func) \
- do { \
- (_work)->data = (atomic_long_t) WORK_DATA_INIT(0); \
- INIT_LIST_HEAD(&(_work)->entry); \
- PREPARE_WORK((_work), (_func)); \
- } while (0)
-
-#define INIT_WORK_NAR(_work, _func) \
+#define INIT_WORK(_work, _func) \
do { \
- (_work)->data = (atomic_long_t) WORK_DATA_INIT(1); \
+ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \
INIT_LIST_HEAD(&(_work)->entry); \
PREPARE_WORK((_work), (_func)); \
} while (0)
@@ -115,12 +89,6 @@ struct execute_work {
init_timer(&(_work)->timer); \
} while (0)
-#define INIT_DELAYED_WORK_NAR(_work, _func) \
- do { \
- INIT_WORK_NAR(&(_work)->work, (_func)); \
- init_timer(&(_work)->timer); \
- } while (0)
-
#define INIT_DELAYED_WORK_DEFERRABLE(_work, _func) \
do { \
INIT_WORK(&(_work)->work, (_func)); \
@@ -143,24 +111,10 @@ struct execute_work {
work_pending(&(w)->work)
/**
- * work_release - Release a work item under execution
- * @work: The work item to release
- *
- * This is used to release a work item that has been initialised with automatic
- * release mode disabled (WORK_STRUCT_NOAUTOREL is set). This gives the work
- * function the opportunity to grab auxiliary data from the container of the
- * work_struct before clearing the pending bit as the work_struct may be
- * subject to deallocation the moment the pending bit is cleared.
- *
- * In such a case, this should be called in the work function after it has
- * fetched any data it may require from the containter of the work_struct.
- * After this function has been called, the work_struct may be scheduled for
- * further execution or it may be deallocated unless other precautions are
- * taken.
- *
- * This should also be used to release a delayed work item.
+ * work_clear_pending - for internal use only, mark a work item as not pending
+ * @work: The work item in question
*/
-#define work_release(work) \
+#define work_clear_pending(work) \
clear_bit(WORK_STRUCT_PENDING, work_data_bits(work))
@@ -174,27 +128,28 @@ extern struct workqueue_struct *__create_workqueue(const char *name,
extern void destroy_workqueue(struct workqueue_struct *wq);
extern int FASTCALL(queue_work(struct workqueue_struct *wq, struct work_struct *work));
-extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq, struct delayed_work *work, unsigned long delay));
+extern int FASTCALL(queue_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *work, unsigned long delay));
extern int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
- struct delayed_work *work, unsigned long delay);
+ struct delayed_work *work, unsigned long delay);
+
extern void FASTCALL(flush_workqueue(struct workqueue_struct *wq));
+extern void flush_scheduled_work(void);
extern int FASTCALL(schedule_work(struct work_struct *work));
-extern int FASTCALL(run_scheduled_work(struct work_struct *work));
-extern int FASTCALL(schedule_delayed_work(struct delayed_work *work, unsigned long delay));
-
-extern int schedule_delayed_work_on(int cpu, struct delayed_work *work, unsigned long delay);
+extern int FASTCALL(schedule_delayed_work(struct delayed_work *work,
+ unsigned long delay));
+extern int schedule_delayed_work_on(int cpu, struct delayed_work *work,
+ unsigned long delay);
extern int schedule_on_each_cpu(work_func_t func);
-extern void flush_scheduled_work(void);
extern int current_is_keventd(void);
extern int keventd_up(void);
extern void init_workqueues(void);
-void cancel_rearming_delayed_work(struct delayed_work *work);
-void cancel_rearming_delayed_workqueue(struct workqueue_struct *,
- struct delayed_work *);
int execute_in_process_context(work_func_t fn, struct execute_work *);
+extern void cancel_work_sync(struct work_struct *work);
+
/*
* Kill off a pending schedule_delayed_work(). Note that the work callback
* function may still be running on return from cancel_delayed_work(), unless
@@ -207,8 +162,18 @@ static inline int cancel_delayed_work(struct delayed_work *work)
ret = del_timer(&work->timer);
if (ret)
- work_release(&work->work);
+ work_clear_pending(&work->work);
return ret;
}
+extern void cancel_rearming_delayed_work(struct delayed_work *work);
+
+/* Obsolete. use cancel_rearming_delayed_work() */
+static inline
+void cancel_rearming_delayed_workqueue(struct workqueue_struct *wq,
+ struct delayed_work *work)
+{
+ cancel_rearming_delayed_work(work);
+}
+
#endif
diff --git a/init/Kconfig b/init/Kconfig
index a7e48796d57..e63a017c391 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -502,6 +502,15 @@ config VM_EVENT_COUNTERS
on EMBEDDED systems. /proc/vmstat will only show page counts
if VM event counters are disabled.
+config SLUB_DEBUG
+ default y
+ bool "Enable SLUB debugging support" if EMBEDDED
+ help
+ SLUB has extensive debug support features. Disabling these can
+ result in significant savings in code size. This also disables
+ SLUB sysfs support. /sys/slab will not exist and there will be
+ no support for cache validation etc.
+
choice
prompt "Choose SLAB allocator"
default SLAB
@@ -512,9 +521,9 @@ config SLAB
bool "SLAB"
help
The regular slab allocator that is established and known to work
- well in all environments. It organizes chache hot objects in
+ well in all environments. It organizes cache hot objects in
per cpu and per node queues. SLAB is the default choice for
- slab allocator.
+ a slab allocator.
config SLUB
depends on EXPERIMENTAL && !ARCH_USES_SLAB_PAGE_STRUCT
@@ -524,21 +533,20 @@ config SLUB
instead of managing queues of cached objects (SLAB approach).
Per cpu caching is realized using slabs of objects instead
of queues of objects. SLUB can use memory efficiently
- way and has enhanced diagnostics.
+ and has enhanced diagnostics.
config SLOB
#
-# SLOB cannot support SMP because SLAB_DESTROY_BY_RCU does not work
-# properly.
+# SLOB does not support SMP because SLAB_DESTROY_BY_RCU is unsupported
#
depends on EMBEDDED && !SMP && !SPARSEMEM
bool "SLOB (Simple Allocator)"
help
SLOB replaces the SLAB allocator with a drastically simpler
allocator. SLOB is more space efficient that SLAB but does not
- scale well (single lock for all operations) and is more susceptible
- to fragmentation. SLOB it is a great choice to reduce
- memory usage and code size for embedded systems.
+ scale well (single lock for all operations) and is also highly
+ susceptible to fragmentation. SLUB can accomplish a higher object
+ density. It is usually better to use SLUB instead of SLOB.
endchoice
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 3f57ed4599d..46fe407fb03 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -7,6 +7,7 @@
#include <linux/root_dev.h>
#include <linux/security.h>
#include <linux/delay.h>
+#include <linux/genhd.h>
#include <linux/mount.h>
#include <linux/device.h>
#include <linux/init.h>
@@ -308,17 +309,21 @@ retry:
/*
* Allow the user to distinguish between failed sys_open
* and bad superblock on root device.
+ * and give them a list of the available devices
*/
#ifdef CONFIG_BLOCK
__bdevname(ROOT_DEV, b);
#endif
printk("VFS: Cannot open root device \"%s\" or %s\n",
root_device_name, b);
- printk("Please append a correct \"root=\" boot option\n");
+ printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
+ printk_all_partitions();
panic("VFS: Unable to mount root fs on %s", b);
}
+ printk("List of all partitions:\n");
+ printk_all_partitions();
printk("No filesystem could mount root, tried: ");
for (p = fs_names; *p; p += strlen(p)+1)
printk(" %s", p);
diff --git a/init/main.c b/init/main.c
index c1537e0ddce..e8d080cab44 100644
--- a/init/main.c
+++ b/init/main.c
@@ -54,6 +54,7 @@
#include <linux/lockdep.h>
#include <linux/pid_namespace.h>
#include <linux/device.h>
+#include <linux/kthread.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -425,8 +426,12 @@ static void __init setup_command_line(char *command_line)
static void noinline rest_init(void)
__releases(kernel_lock)
{
+ int pid;
+
kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
numa_default_policy();
+ pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
+ kthreadd_task = find_task_by_pid(pid);
unlock_kernel();
/*
diff --git a/kernel/configs.c b/kernel/configs.c
index 8fa1fb28f8a..e84d3f9c6c7 100644
--- a/kernel/configs.c
+++ b/kernel/configs.c
@@ -61,18 +61,9 @@ static ssize_t
ikconfig_read_current(struct file *file, char __user *buf,
size_t len, loff_t * offset)
{
- loff_t pos = *offset;
- ssize_t count;
-
- if (pos >= kernel_config_data_size)
- return 0;
-
- count = min(len, (size_t)(kernel_config_data_size - pos));
- if (copy_to_user(buf, kernel_config_data + MAGIC_SIZE + pos, count))
- return -EFAULT;
-
- *offset += count;
- return count;
+ return simple_read_from_buffer(buf, len, offset,
+ kernel_config_data + MAGIC_SIZE,
+ kernel_config_data_size);
}
static const struct file_operations ikconfig_file_ops = {
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 36e70845cfc..208cf3497c1 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -97,7 +97,7 @@ static inline void check_for_tasks(int cpu)
(!cputime_eq(p->utime, cputime_zero) ||
!cputime_eq(p->stime, cputime_zero)))
printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
- (state = %ld, flags = %lx) \n",
+ (state = %ld, flags = %x) \n",
p->comm, p->pid, cpu, p->state, p->flags);
}
write_unlock_irq(&tasklist_lock);
@@ -120,11 +120,13 @@ static int take_cpu_down(void *unused)
}
/* Requires cpu_add_remove_lock to be held */
-static int _cpu_down(unsigned int cpu)
+static int _cpu_down(unsigned int cpu, int tasks_frozen)
{
- int err;
+ int err, nr_calls = 0;
struct task_struct *p;
cpumask_t old_allowed, tmp;
+ void *hcpu = (void *)(long)cpu;
+ unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
if (num_online_cpus() == 1)
return -EBUSY;
@@ -132,12 +134,16 @@ static int _cpu_down(unsigned int cpu)
if (!cpu_online(cpu))
return -EINVAL;
- err = raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE,
- (void *)(long)cpu);
+ raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
+ err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
+ hcpu, -1, &nr_calls);
if (err == NOTIFY_BAD) {
+ __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
+ hcpu, nr_calls, NULL);
printk("%s: attempt to take down CPU %u failed\n",
__FUNCTION__, cpu);
- return -EINVAL;
+ err = -EINVAL;
+ goto out_release;
}
/* Ensure that we are not runnable on dying cpu */
@@ -152,8 +158,8 @@ static int _cpu_down(unsigned int cpu)
if (IS_ERR(p) || cpu_online(cpu)) {
/* CPU didn't die: tell everyone. Can't complain. */
- if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED,
- (void *)(long)cpu) == NOTIFY_BAD)
+ if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
+ hcpu) == NOTIFY_BAD)
BUG();
if (IS_ERR(p)) {
@@ -170,13 +176,9 @@ static int _cpu_down(unsigned int cpu)
/* This actually kills the CPU. */
__cpu_die(cpu);
- /* Move it here so it can run. */
- kthread_bind(p, get_cpu());
- put_cpu();
-
/* CPU is completely dead: tell everyone. Too late to complain. */
- if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD,
- (void *)(long)cpu) == NOTIFY_BAD)
+ if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
+ hcpu) == NOTIFY_BAD)
BUG();
check_for_tasks(cpu);
@@ -185,6 +187,8 @@ out_thread:
err = kthread_stop(p);
out_allowed:
set_cpus_allowed(current, old_allowed);
+out_release:
+ raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
return err;
}
@@ -196,7 +200,7 @@ int cpu_down(unsigned int cpu)
if (cpu_hotplug_disabled)
err = -EBUSY;
else
- err = _cpu_down(cpu);
+ err = _cpu_down(cpu, 0);
mutex_unlock(&cpu_add_remove_lock);
return err;
@@ -204,15 +208,18 @@ int cpu_down(unsigned int cpu)
#endif /*CONFIG_HOTPLUG_CPU*/
/* Requires cpu_add_remove_lock to be held */
-static int __cpuinit _cpu_up(unsigned int cpu)
+static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
{
- int ret;
+ int ret, nr_calls = 0;
void *hcpu = (void *)(long)cpu;
+ unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
if (cpu_online(cpu) || !cpu_present(cpu))
return -EINVAL;
- ret = raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE, hcpu);
+ raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
+ ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
+ -1, &nr_calls);
if (ret == NOTIFY_BAD) {
printk("%s: attempt to bring up CPU %u failed\n",
__FUNCTION__, cpu);
@@ -229,12 +236,13 @@ static int __cpuinit _cpu_up(unsigned int cpu)
BUG_ON(!cpu_online(cpu));
/* Now call notifier in preparation. */
- raw_notifier_call_chain(&cpu_chain, CPU_ONLINE, hcpu);
+ raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
out_notify:
if (ret != 0)
- raw_notifier_call_chain(&cpu_chain,
- CPU_UP_CANCELED, hcpu);
+ __raw_notifier_call_chain(&cpu_chain,
+ CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
+ raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
return ret;
}
@@ -247,19 +255,13 @@ int __cpuinit cpu_up(unsigned int cpu)
if (cpu_hotplug_disabled)
err = -EBUSY;
else
- err = _cpu_up(cpu);
+ err = _cpu_up(cpu, 0);
mutex_unlock(&cpu_add_remove_lock);
return err;
}
#ifdef CONFIG_SUSPEND_SMP
-/* Needed to prevent the microcode driver from requesting firmware in its CPU
- * hotplug notifier during the suspend/resume.
- */
-int suspend_cpu_hotplug;
-EXPORT_SYMBOL(suspend_cpu_hotplug);
-
static cpumask_t frozen_cpus;
int disable_nonboot_cpus(void)
@@ -267,7 +269,6 @@ int disable_nonboot_cpus(void)
int cpu, first_cpu, error = 0;
mutex_lock(&cpu_add_remove_lock);
- suspend_cpu_hotplug = 1;
first_cpu = first_cpu(cpu_online_map);
/* We take down all of the non-boot CPUs in one shot to avoid races
* with the userspace trying to use the CPU hotplug at the same time
@@ -277,7 +278,7 @@ int disable_nonboot_cpus(void)
for_each_online_cpu(cpu) {
if (cpu == first_cpu)
continue;
- error = _cpu_down(cpu);
+ error = _cpu_down(cpu, 1);
if (!error) {
cpu_set(cpu, frozen_cpus);
printk("CPU%d is down\n", cpu);
@@ -294,7 +295,6 @@ int disable_nonboot_cpus(void)
} else {
printk(KERN_ERR "Non-boot CPUs are not disabled\n");
}
- suspend_cpu_hotplug = 0;
mutex_unlock(&cpu_add_remove_lock);
return error;
}
@@ -309,10 +309,9 @@ void enable_nonboot_cpus(void)
if (cpus_empty(frozen_cpus))
goto out;
- suspend_cpu_hotplug = 1;
printk("Enabling non-boot CPUs ...\n");
for_each_cpu_mask(cpu, frozen_cpus) {
- error = _cpu_up(cpu);
+ error = _cpu_up(cpu, 1);
if (!error) {
printk("CPU%d is up\n", cpu);
continue;
@@ -320,7 +319,6 @@ void enable_nonboot_cpus(void)
printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
}
cpus_clear(frozen_cpus);
- suspend_cpu_hotplug = 0;
out:
mutex_unlock(&cpu_add_remove_lock);
}
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 88b416dfbc7..f57854b0892 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1772,12 +1772,7 @@ static ssize_t cpuset_tasks_read(struct file *file, char __user *buf,
{
struct ctr_struct *ctr = file->private_data;
- if (*ppos + nbytes > ctr->bufsz)
- nbytes = ctr->bufsz - *ppos;
- if (copy_to_user(buf, ctr->buf + *ppos, nbytes))
- return -EFAULT;
- *ppos += nbytes;
- return nbytes;
+ return simple_read_from_buffer(buf, nbytes, ppos, ctr->buf, ctr->bufsz);
}
static int cpuset_tasks_release(struct inode *unused_inode, struct file *file)
diff --git a/kernel/exit.c b/kernel/exit.c
index f5a7abb621f..b0c6f0c3a2d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -26,6 +26,7 @@
#include <linux/profile.h>
#include <linux/mount.h>
#include <linux/proc_fs.h>
+#include <linux/kthread.h>
#include <linux/mempolicy.h>
#include <linux/taskstats_kern.h>
#include <linux/delayacct.h>
@@ -254,26 +255,25 @@ static int has_stopped_jobs(struct pid *pgrp)
}
/**
- * reparent_to_init - Reparent the calling kernel thread to the init task of the pid space that the thread belongs to.
+ * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
*
* If a kernel thread is launched as a result of a system call, or if
- * it ever exits, it should generally reparent itself to init so that
- * it is correctly cleaned up on exit.
+ * it ever exits, it should generally reparent itself to kthreadd so it
+ * isn't in the way of other processes and is correctly cleaned up on exit.
*
* The various task state such as scheduling policy and priority may have
* been inherited from a user process, so we reset them to sane values here.
*
- * NOTE that reparent_to_init() gives the caller full capabilities.
+ * NOTE that reparent_to_kthreadd() gives the caller full capabilities.
*/
-static void reparent_to_init(void)
+static void reparent_to_kthreadd(void)
{
write_lock_irq(&tasklist_lock);
ptrace_unlink(current);
/* Reparent to init */
remove_parent(current);
- current->parent = child_reaper(current);
- current->real_parent = child_reaper(current);
+ current->real_parent = current->parent = kthreadd_task;
add_parent(current);
/* Set the exit signal to SIGCHLD so we signal init on exit */
@@ -347,7 +347,7 @@ int disallow_signal(int sig)
return -EINVAL;
spin_lock_irq(&current->sighand->siglock);
- sigaddset(&current->blocked, sig);
+ current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;
recalc_sigpending();
spin_unlock_irq(&current->sighand->siglock);
return 0;
@@ -400,7 +400,7 @@ void daemonize(const char *name, ...)
current->files = init_task.files;
atomic_inc(&current->files->count);
- reparent_to_init();
+ reparent_to_kthreadd();
}
EXPORT_SYMBOL(daemonize);
diff --git a/kernel/fork.c b/kernel/fork.c
index a8dd75d4992..5dd3979747f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -105,7 +105,7 @@ static struct kmem_cache *mm_cachep;
void free_task(struct task_struct *tsk)
{
- free_thread_info(tsk->thread_info);
+ free_thread_info(tsk->stack);
rt_mutex_debug_task_free(tsk);
free_task_struct(tsk);
}
@@ -175,7 +175,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
}
*tsk = *orig;
- tsk->thread_info = ti;
+ tsk->stack = ti;
setup_thread_stack(tsk, orig);
#ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/kernel/futex.c b/kernel/futex.c
index 600bc9d801f..b7ce15c67e3 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -16,6 +16,9 @@
* Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
* Copyright (C) 2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
*
+ * PRIVATE futexes by Eric Dumazet
+ * Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
+ *
* Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
* enough at me, Linus for the original (flawed) idea, Matthew
* Kirkwood for proof-of-concept implementation.
@@ -53,6 +56,12 @@
#include "rtmutex_common.h"
+#ifdef CONFIG_DEBUG_RT_MUTEXES
+# include "rtmutex-debug.h"
+#else
+# include "rtmutex.h"
+#endif
+
#define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
/*
@@ -81,12 +90,12 @@ struct futex_pi_state {
* we can wake only the relevant ones (hashed queues may be shared).
*
* A futex_q has a woken state, just like tasks have TASK_RUNNING.
- * It is considered woken when list_empty(&q->list) || q->lock_ptr == 0.
+ * It is considered woken when plist_node_empty(&q->list) || q->lock_ptr == 0.
* The order of wakup is always to make the first condition true, then
* wake up q->waiters, then make the second condition true.
*/
struct futex_q {
- struct list_head list;
+ struct plist_node list;
wait_queue_head_t waiters;
/* Which hash list lock to use: */
@@ -102,14 +111,20 @@ struct futex_q {
/* Optional priority inheritance state: */
struct futex_pi_state *pi_state;
struct task_struct *task;
+
+ /*
+ * This waiter is used in case of requeue from a
+ * normal futex to a PI-futex
+ */
+ struct rt_mutex_waiter waiter;
};
/*
* Split the global futex_lock into every hash list lock.
*/
struct futex_hash_bucket {
- spinlock_t lock;
- struct list_head chain;
+ spinlock_t lock;
+ struct plist_head chain;
};
static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
@@ -138,19 +153,26 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
&& key1->both.offset == key2->both.offset);
}
-/*
- * Get parameters which are the keys for a futex.
+/**
+ * get_futex_key - Get parameters which are the keys for a futex.
+ * @uaddr: virtual address of the futex
+ * @shared: NULL for a PROCESS_PRIVATE futex,
+ * &current->mm->mmap_sem for a PROCESS_SHARED futex
+ * @key: address where result is stored.
+ *
+ * Returns a negative error code or 0
+ * The key words are stored in *key on success.
*
* For shared mappings, it's (page->index, vma->vm_file->f_path.dentry->d_inode,
* offset_within_page). For private mappings, it's (uaddr, current->mm).
* We can usually work out the index without swapping in the page.
*
- * Returns: 0, or negative error code.
- * The key words are stored in *key on success.
- *
- * Should be called with &current->mm->mmap_sem but NOT any spinlocks.
+ * fshared is NULL for PROCESS_PRIVATE futexes
+ * For other futexes, it points to &current->mm->mmap_sem and
+ * caller must have taken the reader lock. but NOT any spinlocks.
*/
-int get_futex_key(u32 __user *uaddr, union futex_key *key)
+int get_futex_key(u32 __user *uaddr, struct rw_semaphore *fshared,
+ union futex_key *key)
{
unsigned long address = (unsigned long)uaddr;
struct mm_struct *mm = current->mm;
@@ -162,11 +184,25 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key)
* The futex address must be "naturally" aligned.
*/
key->both.offset = address % PAGE_SIZE;
- if (unlikely((key->both.offset % sizeof(u32)) != 0))
+ if (unlikely((address % sizeof(u32)) != 0))
return -EINVAL;
address -= key->both.offset;
/*
+ * PROCESS_PRIVATE futexes are fast.
+ * As the mm cannot disappear under us and the 'key' only needs
+ * virtual address, we dont even have to find the underlying vma.
+ * Note : We do have to check 'uaddr' is a valid user address,
+ * but access_ok() should be faster than find_vma()
+ */
+ if (!fshared) {
+ if (unlikely(!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))))
+ return -EFAULT;
+ key->private.mm = mm;
+ key->private.address = address;
+ return 0;
+ }
+ /*
* The futex is hashed differently depending on whether
* it's in a shared or private mapping. So check vma first.
*/
@@ -180,6 +216,9 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key)
if (unlikely((vma->vm_flags & (VM_IO|VM_READ)) != VM_READ))
return (vma->vm_flags & VM_IO) ? -EPERM : -EACCES;
+ /* Save the user address in the ley */
+ key->uaddr = uaddr;
+
/*
* Private mappings are handled in a simple way.
*
@@ -190,6 +229,7 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key)
* mappings of _writable_ handles.
*/
if (likely(!(vma->vm_flags & VM_MAYSHARE))) {
+ key->both.offset |= FUT_OFF_MMSHARED; /* reference taken on mm */
key->private.mm = mm;
key->private.address = address;
return 0;
@@ -199,7 +239,7 @@ int get_futex_key(u32 __user *uaddr, union futex_key *key)
* Linear file mappings are also simple.
*/
key->shared.inode = vma->vm_file->f_path.dentry->d_inode;
- key->both.offset++; /* Bit 0 of offset indicates inode-based key. */
+ key->both.offset |= FUT_OFF_INODE; /* inode-based key. */
if (likely(!(vma->vm_flags & VM_NONLINEAR))) {
key->shared.pgoff = (((address - vma->vm_start) >> PAGE_SHIFT)
+ vma->vm_pgoff);
@@ -227,16 +267,18 @@ EXPORT_SYMBOL_GPL(get_futex_key);
* Take a reference to the resource addressed by a key.
* Can be called while holding spinlocks.
*
- * NOTE: mmap_sem MUST be held between get_futex_key() and calling this
- * function, if it is called at all. mmap_sem keeps key->shared.inode valid.
*/
inline void get_futex_key_refs(union futex_key *key)
{
- if (key->both.ptr != 0) {
- if (key->both.offset & 1)
+ if (key->both.ptr == 0)
+ return;
+ switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+ case FUT_OFF_INODE:
atomic_inc(&key->shared.inode->i_count);
- else
+ break;
+ case FUT_OFF_MMSHARED:
atomic_inc(&key->private.mm->mm_count);
+ break;
}
}
EXPORT_SYMBOL_GPL(get_futex_key_refs);
@@ -247,11 +289,15 @@ EXPORT_SYMBOL_GPL(get_futex_key_refs);
*/
void drop_futex_key_refs(union futex_key *key)
{
- if (key->both.ptr != 0) {
- if (key->both.offset & 1)
+ if (key->both.ptr == 0)
+ return;
+ switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
+ case FUT_OFF_INODE:
iput(key->shared.inode);
- else
+ break;
+ case FUT_OFF_MMSHARED:
mmdrop(key->private.mm);
+ break;
}
}
EXPORT_SYMBOL_GPL(drop_futex_key_refs);
@@ -268,28 +314,38 @@ static inline int get_futex_value_locked(u32 *dest, u32 __user *from)
}
/*
- * Fault handling. Called with current->mm->mmap_sem held.
+ * Fault handling.
+ * if fshared is non NULL, current->mm->mmap_sem is already held
*/
-static int futex_handle_fault(unsigned long address, int attempt)
+static int futex_handle_fault(unsigned long address,
+ struct rw_semaphore *fshared, int attempt)
{
struct vm_area_struct * vma;
struct mm_struct *mm = current->mm;
+ int ret = -EFAULT;
- if (attempt > 2 || !(vma = find_vma(mm, address)) ||
- vma->vm_start > address || !(vma->vm_flags & VM_WRITE))
- return -EFAULT;
+ if (attempt > 2)
+ return ret;
- switch (handle_mm_fault(mm, vma, address, 1)) {
- case VM_FAULT_MINOR:
- current->min_flt++;
- break;
- case VM_FAULT_MAJOR:
- current->maj_flt++;
- break;
- default:
- return -EFAULT;
+ if (!fshared)
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, address);
+ if (vma && address >= vma->vm_start &&
+ (vma->vm_flags & VM_WRITE)) {
+ switch (handle_mm_fault(mm, vma, address, 1)) {
+ case VM_FAULT_MINOR:
+ ret = 0;
+ current->min_flt++;
+ break;
+ case VM_FAULT_MAJOR:
+ ret = 0;
+ current->maj_flt++;
+ break;
+ }
}
- return 0;
+ if (!fshared)
+ up_read(&mm->mmap_sem);
+ return ret;
}
/*
@@ -439,18 +495,19 @@ void exit_pi_state_list(struct task_struct *curr)
}
static int
-lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
+lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
+ union futex_key *key, struct futex_pi_state **ps)
{
struct futex_pi_state *pi_state = NULL;
struct futex_q *this, *next;
- struct list_head *head;
+ struct plist_head *head;
struct task_struct *p;
pid_t pid;
head = &hb->chain;
- list_for_each_entry_safe(this, next, head, list) {
- if (match_futex(&this->key, &me->key)) {
+ plist_for_each_entry_safe(this, next, head, list) {
+ if (match_futex(&this->key, key)) {
/*
* Another waiter already exists - bump up
* the refcount and return its pi_state:
@@ -465,7 +522,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
WARN_ON(!atomic_read(&pi_state->refcount));
atomic_inc(&pi_state->refcount);
- me->pi_state = pi_state;
+ *ps = pi_state;
return 0;
}
@@ -492,7 +549,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
/* Store the key for possible exit cleanups: */
- pi_state->key = me->key;
+ pi_state->key = *key;
spin_lock_irq(&p->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
@@ -502,7 +559,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
put_task_struct(p);
- me->pi_state = pi_state;
+ *ps = pi_state;
return 0;
}
@@ -513,12 +570,12 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
*/
static void wake_futex(struct futex_q *q)
{
- list_del_init(&q->list);
+ plist_del(&q->list, &q->list.plist);
if (q->filp)
send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
/*
* The lock in wake_up_all() is a crucial memory barrier after the
- * list_del_init() and also before assigning to q->lock_ptr.
+ * plist_del() and also before assigning to q->lock_ptr.
*/
wake_up_all(&q->waiters);
/*
@@ -562,6 +619,8 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
*/
if (!(uval & FUTEX_OWNER_DIED)) {
newval = FUTEX_WAITERS | new_owner->pid;
+ /* Keep the FUTEX_WAITER_REQUEUED flag if it was set */
+ newval |= (uval & FUTEX_WAITER_REQUEUED);
pagefault_disable();
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -629,17 +688,19 @@ double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
* Wake up all waiters hashed on the physical page that is mapped
* to this virtual address:
*/
-static int futex_wake(u32 __user *uaddr, int nr_wake)
+static int futex_wake(u32 __user *uaddr, struct rw_semaphore *fshared,
+ int nr_wake)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
- struct list_head *head;
+ struct plist_head *head;
union futex_key key;
int ret;
- down_read(&current->mm->mmap_sem);
+ if (fshared)
+ down_read(fshared);
- ret = get_futex_key(uaddr, &key);
+ ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
goto out;
@@ -647,7 +708,7 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
spin_lock(&hb->lock);
head = &hb->chain;
- list_for_each_entry_safe(this, next, head, list) {
+ plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key)) {
if (this->pi_state) {
ret = -EINVAL;
@@ -661,7 +722,261 @@ static int futex_wake(u32 __user *uaddr, int nr_wake)
spin_unlock(&hb->lock);
out:
- up_read(&current->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
+ return ret;
+}
+
+/*
+ * Called from futex_requeue_pi.
+ * Set FUTEX_WAITERS and FUTEX_WAITER_REQUEUED flags on the
+ * PI-futex value; search its associated pi_state if an owner exist
+ * or create a new one without owner.
+ */
+static inline int
+lookup_pi_state_for_requeue(u32 __user *uaddr, struct futex_hash_bucket *hb,
+ union futex_key *key,
+ struct futex_pi_state **pi_state)
+{
+ u32 curval, uval, newval;
+
+retry:
+ /*
+ * We can't handle a fault cleanly because we can't
+ * release the locks here. Simply return the fault.
+ */
+ if (get_futex_value_locked(&curval, uaddr))
+ return -EFAULT;
+
+ /* set the flags FUTEX_WAITERS and FUTEX_WAITER_REQUEUED */
+ if ((curval & (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED))
+ != (FUTEX_WAITERS | FUTEX_WAITER_REQUEUED)) {
+ /*
+ * No waiters yet, we prepare the futex to have some waiters.
+ */
+
+ uval = curval;
+ newval = uval | FUTEX_WAITERS | FUTEX_WAITER_REQUEUED;
+
+ pagefault_disable();
+ curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+ pagefault_enable();
+
+ if (unlikely(curval == -EFAULT))
+ return -EFAULT;
+ if (unlikely(curval != uval))
+ goto retry;
+ }
+
+ if (!(curval & FUTEX_TID_MASK)
+ || lookup_pi_state(curval, hb, key, pi_state)) {
+ /* the futex has no owner (yet) or the lookup failed:
+ allocate one pi_state without owner */
+
+ *pi_state = alloc_pi_state();
+
+ /* Already stores the key: */
+ (*pi_state)->key = *key;
+
+ /* init the mutex without owner */
+ __rt_mutex_init(&(*pi_state)->pi_mutex, NULL);
+ }
+
+ return 0;
+}
+
+/*
+ * Keep the first nr_wake waiter from futex1, wake up one,
+ * and requeue the next nr_requeue waiters following hashed on
+ * one physical page to another physical page (PI-futex uaddr2)
+ */
+static int futex_requeue_pi(u32 __user *uaddr1,
+ struct rw_semaphore *fshared,
+ u32 __user *uaddr2,
+ int nr_wake, int nr_requeue, u32 *cmpval)
+{
+ union futex_key key1, key2;
+ struct futex_hash_bucket *hb1, *hb2;
+ struct plist_head *head1;
+ struct futex_q *this, *next;
+ struct futex_pi_state *pi_state2 = NULL;
+ struct rt_mutex_waiter *waiter, *top_waiter = NULL;
+ struct rt_mutex *lock2 = NULL;
+ int ret, drop_count = 0;
+
+ if (refill_pi_state_cache())
+ return -ENOMEM;
+
+retry:
+ /*
+ * First take all the futex related locks:
+ */
+ if (fshared)
+ down_read(fshared);
+
+ ret = get_futex_key(uaddr1, fshared, &key1);
+ if (unlikely(ret != 0))
+ goto out;
+ ret = get_futex_key(uaddr2, fshared, &key2);
+ if (unlikely(ret != 0))
+ goto out;
+
+ hb1 = hash_futex(&key1);
+ hb2 = hash_futex(&key2);
+
+ double_lock_hb(hb1, hb2);
+
+ if (likely(cmpval != NULL)) {
+ u32 curval;
+
+ ret = get_futex_value_locked(&curval, uaddr1);
+
+ if (unlikely(ret)) {
+ spin_unlock(&hb1->lock);
+ if (hb1 != hb2)
+ spin_unlock(&hb2->lock);
+
+ /*
+ * If we would have faulted, release mmap_sem, fault
+ * it in and start all over again.
+ */
+ if (fshared)
+ up_read(fshared);
+
+ ret = get_user(curval, uaddr1);
+
+ if (!ret)
+ goto retry;
+
+ return ret;
+ }
+ if (curval != *cmpval) {
+ ret = -EAGAIN;
+ goto out_unlock;
+ }
+ }
+
+ head1 = &hb1->chain;
+ plist_for_each_entry_safe(this, next, head1, list) {
+ if (!match_futex (&this->key, &key1))
+ continue;
+ if (++ret <= nr_wake) {
+ wake_futex(this);
+ } else {
+ /*
+ * FIRST: get and set the pi_state
+ */
+ if (!pi_state2) {
+ int s;
+ /* do this only the first time we requeue someone */
+ s = lookup_pi_state_for_requeue(uaddr2, hb2,
+ &key2, &pi_state2);
+ if (s) {
+ ret = s;
+ goto out_unlock;
+ }
+
+ lock2 = &pi_state2->pi_mutex;
+ spin_lock(&lock2->wait_lock);
+
+ /* Save the top waiter of the wait_list */
+ if (rt_mutex_has_waiters(lock2))
+ top_waiter = rt_mutex_top_waiter(lock2);
+ } else
+ atomic_inc(&pi_state2->refcount);
+
+
+ this->pi_state = pi_state2;
+
+ /*
+ * SECOND: requeue futex_q to the correct hashbucket
+ */
+
+ /*
+ * If key1 and key2 hash to the same bucket, no need to
+ * requeue.
+ */
+ if (likely(head1 != &hb2->chain)) {
+ plist_del(&this->list, &hb1->chain);
+ plist_add(&this->list, &hb2->chain);
+ this->lock_ptr = &hb2->lock;
+#ifdef CONFIG_DEBUG_PI_LIST
+ this->list.plist.lock = &hb2->lock;
+#endif
+ }
+ this->key = key2;
+ get_futex_key_refs(&key2);
+ drop_count++;
+
+
+ /*
+ * THIRD: queue it to lock2
+ */
+ spin_lock_irq(&this->task->pi_lock);
+ waiter = &this->waiter;
+ waiter->task = this->task;
+ waiter->lock = lock2;
+ plist_node_init(&waiter->list_entry, this->task->prio);
+ plist_node_init(&waiter->pi_list_entry, this->task->prio);
+ plist_add(&waiter->list_entry, &lock2->wait_list);
+ this->task->pi_blocked_on = waiter;
+ spin_unlock_irq(&this->task->pi_lock);
+
+ if (ret - nr_wake >= nr_requeue)
+ break;
+ }
+ }
+
+ /* If we've requeued some tasks and the top_waiter of the rt_mutex
+ has changed, we must adjust the priority of the owner, if any */
+ if (drop_count) {
+ struct task_struct *owner = rt_mutex_owner(lock2);
+ if (owner &&
+ (top_waiter != (waiter = rt_mutex_top_waiter(lock2)))) {
+ int chain_walk = 0;
+
+ spin_lock_irq(&owner->pi_lock);
+ if (top_waiter)
+ plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
+ else
+ /*
+ * There was no waiters before the requeue,
+ * the flag must be updated
+ */
+ mark_rt_mutex_waiters(lock2);
+
+ plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
+ __rt_mutex_adjust_prio(owner);
+ if (owner->pi_blocked_on) {
+ chain_walk = 1;
+ get_task_struct(owner);
+ }
+
+ spin_unlock_irq(&owner->pi_lock);
+ spin_unlock(&lock2->wait_lock);
+
+ if (chain_walk)
+ rt_mutex_adjust_prio_chain(owner, 0, lock2, NULL,
+ current);
+ } else {
+ /* No owner or the top_waiter does not change */
+ mark_rt_mutex_waiters(lock2);
+ spin_unlock(&lock2->wait_lock);
+ }
+ }
+
+out_unlock:
+ spin_unlock(&hb1->lock);
+ if (hb1 != hb2)
+ spin_unlock(&hb2->lock);
+
+ /* drop_futex_key_refs() must be called outside the spinlocks. */
+ while (--drop_count >= 0)
+ drop_futex_key_refs(&key1);
+
+out:
+ if (fshared)
+ up_read(fshared);
return ret;
}
@@ -670,22 +985,24 @@ out:
* to this virtual address:
*/
static int
-futex_wake_op(u32 __user *uaddr1, u32 __user *uaddr2,
+futex_wake_op(u32 __user *uaddr1, struct rw_semaphore *fshared,
+ u32 __user *uaddr2,
int nr_wake, int nr_wake2, int op)
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
- struct list_head *head;
+ struct plist_head *head;
struct futex_q *this, *next;
int ret, op_ret, attempt = 0;
retryfull:
- down_read(&current->mm->mmap_sem);
+ if (fshared)
+ down_read(fshared);
- ret = get_futex_key(uaddr1, &key1);
+ ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, &key2);
+ ret = get_futex_key(uaddr2, fshared, &key2);
if (unlikely(ret != 0))
goto out;
@@ -725,11 +1042,10 @@ retry:
* still holding the mmap_sem.
*/
if (attempt++) {
- if (futex_handle_fault((unsigned long)uaddr2,
- attempt)) {
- ret = -EFAULT;
+ ret = futex_handle_fault((unsigned long)uaddr2,
+ fshared, attempt);
+ if (ret)
goto out;
- }
goto retry;
}
@@ -737,7 +1053,8 @@ retry:
* If we would have faulted, release mmap_sem,
* fault it in and start all over again.
*/
- up_read(&current->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
ret = get_user(dummy, uaddr2);
if (ret)
@@ -748,7 +1065,7 @@ retry:
head = &hb1->chain;
- list_for_each_entry_safe(this, next, head, list) {
+ plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key1)) {
wake_futex(this);
if (++ret >= nr_wake)
@@ -760,7 +1077,7 @@ retry:
head = &hb2->chain;
op_ret = 0;
- list_for_each_entry_safe(this, next, head, list) {
+ plist_for_each_entry_safe(this, next, head, list) {
if (match_futex (&this->key, &key2)) {
wake_futex(this);
if (++op_ret >= nr_wake2)
@@ -774,7 +1091,8 @@ retry:
if (hb1 != hb2)
spin_unlock(&hb2->lock);
out:
- up_read(&current->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
return ret;
}
@@ -782,22 +1100,24 @@ out:
* Requeue all waiters hashed on one physical page to another
* physical page.
*/
-static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
+static int futex_requeue(u32 __user *uaddr1, struct rw_semaphore *fshared,
+ u32 __user *uaddr2,
int nr_wake, int nr_requeue, u32 *cmpval)
{
union futex_key key1, key2;
struct futex_hash_bucket *hb1, *hb2;
- struct list_head *head1;
+ struct plist_head *head1;
struct futex_q *this, *next;
int ret, drop_count = 0;
retry:
- down_read(&current->mm->mmap_sem);
+ if (fshared)
+ down_read(fshared);
- ret = get_futex_key(uaddr1, &key1);
+ ret = get_futex_key(uaddr1, fshared, &key1);
if (unlikely(ret != 0))
goto out;
- ret = get_futex_key(uaddr2, &key2);
+ ret = get_futex_key(uaddr2, fshared, &key2);
if (unlikely(ret != 0))
goto out;
@@ -820,7 +1140,8 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
* If we would have faulted, release mmap_sem, fault
* it in and start all over again.
*/
- up_read(&current->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
ret = get_user(curval, uaddr1);
@@ -836,7 +1157,7 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
}
head1 = &hb1->chain;
- list_for_each_entry_safe(this, next, head1, list) {
+ plist_for_each_entry_safe(this, next, head1, list) {
if (!match_futex (&this->key, &key1))
continue;
if (++ret <= nr_wake) {
@@ -847,9 +1168,13 @@ static int futex_requeue(u32 __user *uaddr1, u32 __user *uaddr2,
* requeue.
*/
if (likely(head1 != &hb2->chain)) {
- list_move_tail(&this->list, &hb2->chain);
+ plist_del(&this->list, &hb1->chain);
+ plist_add(&this->list, &hb2->chain);
this->lock_ptr = &hb2->lock;
- }
+#ifdef CONFIG_DEBUG_PI_LIST
+ this->list.plist.lock = &hb2->lock;
+#endif
+ }
this->key = key2;
get_futex_key_refs(&key2);
drop_count++;
@@ -869,7 +1194,8 @@ out_unlock:
drop_futex_key_refs(&key1);
out:
- up_read(&current->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
return ret;
}
@@ -894,7 +1220,23 @@ queue_lock(struct futex_q *q, int fd, struct file *filp)
static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
{
- list_add_tail(&q->list, &hb->chain);
+ int prio;
+
+ /*
+ * The priority used to register this element is
+ * - either the real thread-priority for the real-time threads
+ * (i.e. threads with a priority lower than MAX_RT_PRIO)
+ * - or MAX_RT_PRIO for non-RT threads.
+ * Thus, all RT-threads are woken first in priority order, and
+ * the others are woken last, in FIFO order.
+ */
+ prio = min(current->normal_prio, MAX_RT_PRIO);
+
+ plist_node_init(&q->list, prio);
+#ifdef CONFIG_DEBUG_PI_LIST
+ q->list.plist.lock = &hb->lock;
+#endif
+ plist_add(&q->list, &hb->chain);
q->task = current;
spin_unlock(&hb->lock);
}
@@ -949,8 +1291,8 @@ static int unqueue_me(struct futex_q *q)
spin_unlock(lock_ptr);
goto retry;
}
- WARN_ON(list_empty(&q->list));
- list_del(&q->list);
+ WARN_ON(plist_node_empty(&q->list));
+ plist_del(&q->list, &q->list.plist);
BUG_ON(q->pi_state);
@@ -964,39 +1306,104 @@ static int unqueue_me(struct futex_q *q)
/*
* PI futexes can not be requeued and must remove themself from the
- * hash bucket. The hash bucket lock is held on entry and dropped here.
+ * hash bucket. The hash bucket lock (i.e. lock_ptr) is held on entry
+ * and dropped here.
*/
-static void unqueue_me_pi(struct futex_q *q, struct futex_hash_bucket *hb)
+static void unqueue_me_pi(struct futex_q *q)
{
- WARN_ON(list_empty(&q->list));
- list_del(&q->list);
+ WARN_ON(plist_node_empty(&q->list));
+ plist_del(&q->list, &q->list.plist);
BUG_ON(!q->pi_state);
free_pi_state(q->pi_state);
q->pi_state = NULL;
- spin_unlock(&hb->lock);
+ spin_unlock(q->lock_ptr);
drop_futex_key_refs(&q->key);
}
+/*
+ * Fixup the pi_state owner with current.
+ *
+ * The cur->mm semaphore must be held, it is released at return of this
+ * function.
+ */
+static int fixup_pi_state_owner(u32 __user *uaddr, struct rw_semaphore *fshared,
+ struct futex_q *q,
+ struct futex_hash_bucket *hb,
+ struct task_struct *curr)
+{
+ u32 newtid = curr->pid | FUTEX_WAITERS;
+ struct futex_pi_state *pi_state = q->pi_state;
+ u32 uval, curval, newval;
+ int ret;
+
+ /* Owner died? */
+ if (pi_state->owner != NULL) {
+ spin_lock_irq(&pi_state->owner->pi_lock);
+ WARN_ON(list_empty(&pi_state->list));
+ list_del_init(&pi_state->list);
+ spin_unlock_irq(&pi_state->owner->pi_lock);
+ } else
+ newtid |= FUTEX_OWNER_DIED;
+
+ pi_state->owner = curr;
+
+ spin_lock_irq(&curr->pi_lock);
+ WARN_ON(!list_empty(&pi_state->list));
+ list_add(&pi_state->list, &curr->pi_state_list);
+ spin_unlock_irq(&curr->pi_lock);
+
+ /* Unqueue and drop the lock */
+ unqueue_me_pi(q);
+ if (fshared)
+ up_read(fshared);
+ /*
+ * We own it, so we have to replace the pending owner
+ * TID. This must be atomic as we have preserve the
+ * owner died bit here.
+ */
+ ret = get_user(uval, uaddr);
+ while (!ret) {
+ newval = (uval & FUTEX_OWNER_DIED) | newtid;
+ newval |= (uval & FUTEX_WAITER_REQUEUED);
+ curval = futex_atomic_cmpxchg_inatomic(uaddr,
+ uval, newval);
+ if (curval == -EFAULT)
+ ret = -EFAULT;
+ if (curval == uval)
+ break;
+ uval = curval;
+ }
+ return ret;
+}
+
+/*
+ * In case we must use restart_block to restart a futex_wait,
+ * we encode in the 'arg3' shared capability
+ */
+#define ARG3_SHARED 1
+
static long futex_wait_restart(struct restart_block *restart);
-static int futex_wait_abstime(u32 __user *uaddr, u32 val,
- int timed, unsigned long abs_time)
+static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
+ u32 val, ktime_t *abs_time)
{
struct task_struct *curr = current;
DECLARE_WAITQUEUE(wait, curr);
struct futex_hash_bucket *hb;
struct futex_q q;
- unsigned long time_left = 0;
u32 uval;
int ret;
+ struct hrtimer_sleeper t, *to = NULL;
+ int rem = 0;
q.pi_state = NULL;
retry:
- down_read(&curr->mm->mmap_sem);
+ if (fshared)
+ down_read(fshared);
- ret = get_futex_key(uaddr, &q.key);
+ ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
goto out_release_sem;
@@ -1019,8 +1426,8 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val,
* a wakeup when *uaddr != val on entry to the syscall. This is
* rare, but normal.
*
- * We hold the mmap semaphore, so the mapping cannot have changed
- * since we looked it up in get_futex_key.
+ * for shared futexes, we hold the mmap semaphore, so the mapping
+ * cannot have changed since we looked it up in get_futex_key.
*/
ret = get_futex_value_locked(&uval, uaddr);
@@ -1031,7 +1438,8 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val,
* If we would have faulted, release mmap_sem, fault it in and
* start all over again.
*/
- up_read(&curr->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
ret = get_user(uval, uaddr);
@@ -1043,6 +1451,14 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val,
if (uval != val)
goto out_unlock_release_sem;
+ /*
+ * This rt_mutex_waiter structure is prepared here and will
+ * be used only if this task is requeued from a normal futex to
+ * a PI-futex with futex_requeue_pi.
+ */
+ debug_rt_mutex_init_waiter(&q.waiter);
+ q.waiter.task = NULL;
+
/* Only actually queue if *uaddr contained val. */
__queue_me(&q, hb);
@@ -1050,7 +1466,8 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val,
* Now the futex is queued and we have checked the data, we
* don't want to hold mmap_sem while we sleep.
*/
- up_read(&curr->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
/*
* There might have been scheduling since the queue_me(), as we
@@ -1065,23 +1482,33 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val,
__set_current_state(TASK_INTERRUPTIBLE);
add_wait_queue(&q.waiters, &wait);
/*
- * !list_empty() is safe here without any lock.
+ * !plist_node_empty() is safe here without any lock.
* q.lock_ptr != 0 is not safe, because of ordering against wakeup.
*/
- time_left = 0;
- if (likely(!list_empty(&q.list))) {
- unsigned long rel_time;
-
- if (timed) {
- unsigned long now = jiffies;
- if (time_after(now, abs_time))
- rel_time = 0;
- else
- rel_time = abs_time - now;
- } else
- rel_time = MAX_SCHEDULE_TIMEOUT;
+ if (likely(!plist_node_empty(&q.list))) {
+ if (!abs_time)
+ schedule();
+ else {
+ to = &t;
+ hrtimer_init(&t.timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init_sleeper(&t, current);
+ t.timer.expires = *abs_time;
- time_left = schedule_timeout(rel_time);
+ hrtimer_start(&t.timer, t.timer.expires, HRTIMER_MODE_ABS);
+
+ /*
+ * the timer could have already expired, in which
+ * case current would be flagged for rescheduling.
+ * Don't bother calling schedule.
+ */
+ if (likely(t.task))
+ schedule();
+
+ hrtimer_cancel(&t.timer);
+
+ /* Flag if a timeout occured */
+ rem = (t.task == NULL);
+ }
}
__set_current_state(TASK_RUNNING);
@@ -1090,17 +1517,80 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val,
* we are the only user of it.
*/
+ if (q.pi_state) {
+ /*
+ * We were woken but have been requeued on a PI-futex.
+ * We have to complete the lock acquisition by taking
+ * the rtmutex.
+ */
+
+ struct rt_mutex *lock = &q.pi_state->pi_mutex;
+
+ spin_lock(&lock->wait_lock);
+ if (unlikely(q.waiter.task)) {
+ remove_waiter(lock, &q.waiter);
+ }
+ spin_unlock(&lock->wait_lock);
+
+ if (rem)
+ ret = -ETIMEDOUT;
+ else
+ ret = rt_mutex_timed_lock(lock, to, 1);
+
+ if (fshared)
+ down_read(fshared);
+ spin_lock(q.lock_ptr);
+
+ /*
+ * Got the lock. We might not be the anticipated owner if we
+ * did a lock-steal - fix up the PI-state in that case.
+ */
+ if (!ret && q.pi_state->owner != curr) {
+ /*
+ * We MUST play with the futex we were requeued on,
+ * NOT the current futex.
+ * We can retrieve it from the key of the pi_state
+ */
+ uaddr = q.pi_state->key.uaddr;
+
+ /* mmap_sem and hash_bucket lock are unlocked at
+ return of this function */
+ ret = fixup_pi_state_owner(uaddr, fshared,
+ &q, hb, curr);
+ } else {
+ /*
+ * Catch the rare case, where the lock was released
+ * when we were on the way back before we locked
+ * the hash bucket.
+ */
+ if (ret && q.pi_state->owner == curr) {
+ if (rt_mutex_trylock(&q.pi_state->pi_mutex))
+ ret = 0;
+ }
+ /* Unqueue and drop the lock */
+ unqueue_me_pi(&q);
+ if (fshared)
+ up_read(fshared);
+ }
+
+ debug_rt_mutex_free_waiter(&q.waiter);
+
+ return ret;
+ }
+
+ debug_rt_mutex_free_waiter(&q.waiter);
+
/* If we were woken (and unqueued), we succeeded, whatever. */
if (!unqueue_me(&q))
return 0;
- if (time_left == 0)
+ if (rem)
return -ETIMEDOUT;
/*
* We expect signal_pending(current), but another thread may
* have handled it for us already.
*/
- if (time_left == MAX_SCHEDULE_TIMEOUT)
+ if (!abs_time)
return -ERESTARTSYS;
else {
struct restart_block *restart;
@@ -1108,8 +1598,10 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val,
restart->fn = futex_wait_restart;
restart->arg0 = (unsigned long)uaddr;
restart->arg1 = (unsigned long)val;
- restart->arg2 = (unsigned long)timed;
- restart->arg3 = abs_time;
+ restart->arg2 = (unsigned long)abs_time;
+ restart->arg3 = 0;
+ if (fshared)
+ restart->arg3 |= ARG3_SHARED;
return -ERESTART_RESTARTBLOCK;
}
@@ -1117,65 +1609,111 @@ static int futex_wait_abstime(u32 __user *uaddr, u32 val,
queue_unlock(&q, hb);
out_release_sem:
- up_read(&curr->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
return ret;
}
-static int futex_wait(u32 __user *uaddr, u32 val, unsigned long rel_time)
-{
- int timed = (rel_time != MAX_SCHEDULE_TIMEOUT);
- return futex_wait_abstime(uaddr, val, timed, jiffies+rel_time);
-}
static long futex_wait_restart(struct restart_block *restart)
{
u32 __user *uaddr = (u32 __user *)restart->arg0;
u32 val = (u32)restart->arg1;
- int timed = (int)restart->arg2;
- unsigned long abs_time = restart->arg3;
+ ktime_t *abs_time = (ktime_t *)restart->arg2;
+ struct rw_semaphore *fshared = NULL;
restart->fn = do_no_restart_syscall;
- return (long)futex_wait_abstime(uaddr, val, timed, abs_time);
+ if (restart->arg3 & ARG3_SHARED)
+ fshared = &current->mm->mmap_sem;
+ return (long)futex_wait(uaddr, fshared, val, abs_time);
}
+static void set_pi_futex_owner(struct futex_hash_bucket *hb,
+ union futex_key *key, struct task_struct *p)
+{
+ struct plist_head *head;
+ struct futex_q *this, *next;
+ struct futex_pi_state *pi_state = NULL;
+ struct rt_mutex *lock;
+
+ /* Search a waiter that should already exists */
+
+ head = &hb->chain;
+
+ plist_for_each_entry_safe(this, next, head, list) {
+ if (match_futex (&this->key, key)) {
+ pi_state = this->pi_state;
+ break;
+ }
+ }
+
+ BUG_ON(!pi_state);
+
+ /* set p as pi_state's owner */
+ lock = &pi_state->pi_mutex;
+
+ spin_lock(&lock->wait_lock);
+ spin_lock_irq(&p->pi_lock);
+
+ list_add(&pi_state->list, &p->pi_state_list);
+ pi_state->owner = p;
+
+
+ /* set p as pi_mutex's owner */
+ debug_rt_mutex_proxy_lock(lock, p);
+ WARN_ON(rt_mutex_owner(lock));
+ rt_mutex_set_owner(lock, p, 0);
+ rt_mutex_deadlock_account_lock(lock, p);
+
+ plist_add(&rt_mutex_top_waiter(lock)->pi_list_entry,
+ &p->pi_waiters);
+ __rt_mutex_adjust_prio(p);
+
+ spin_unlock_irq(&p->pi_lock);
+ spin_unlock(&lock->wait_lock);
+}
+
/*
* Userspace tried a 0 -> TID atomic transition of the futex value
* and failed. The kernel side here does the whole locking operation:
* if there are waiters then it will block, it does PI, etc. (Due to
* races the kernel might see a 0 value of the futex too.)
*/
-static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
- long nsec, int trylock)
+static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
+ int detect, ktime_t *time, int trylock)
{
struct hrtimer_sleeper timeout, *to = NULL;
struct task_struct *curr = current;
struct futex_hash_bucket *hb;
u32 uval, newval, curval;
struct futex_q q;
- int ret, attempt = 0;
+ int ret, lock_held, attempt = 0;
if (refill_pi_state_cache())
return -ENOMEM;
- if (sec != MAX_SCHEDULE_TIMEOUT) {
+ if (time) {
to = &timeout;
hrtimer_init(&to->timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
hrtimer_init_sleeper(to, current);
- to->timer.expires = ktime_set(sec, nsec);
+ to->timer.expires = *time;
}
q.pi_state = NULL;
retry:
- down_read(&curr->mm->mmap_sem);
+ if (fshared)
+ down_read(fshared);
- ret = get_futex_key(uaddr, &q.key);
+ ret = get_futex_key(uaddr, fshared, &q.key);
if (unlikely(ret != 0))
goto out_release_sem;
hb = queue_lock(&q, -1, NULL);
retry_locked:
+ lock_held = 0;
+
/*
* To avoid races, we attempt to take the lock here again
* (by doing a 0 -> TID atomic cmpxchg), while holding all
@@ -1194,7 +1732,16 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
if (unlikely((curval & FUTEX_TID_MASK) == current->pid)) {
if (!detect && 0)
force_sig(SIGKILL, current);
- ret = -EDEADLK;
+ /*
+ * Normally, this check is done in user space.
+ * In case of requeue, the owner may attempt to lock this futex,
+ * even if the ownership has already been given by the previous
+ * waker.
+ * In the usual case, this is a case of deadlock, but not in case
+ * of REQUEUE_PI.
+ */
+ if (!(curval & FUTEX_WAITER_REQUEUED))
+ ret = -EDEADLK;
goto out_unlock_release_sem;
}
@@ -1206,7 +1753,18 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
goto out_unlock_release_sem;
uval = curval;
- newval = uval | FUTEX_WAITERS;
+ /*
+ * In case of a requeue, check if there already is an owner
+ * If not, just take the futex.
+ */
+ if ((curval & FUTEX_WAITER_REQUEUED) && !(curval & FUTEX_TID_MASK)) {
+ /* set current as futex owner */
+ newval = curval | current->pid;
+ lock_held = 1;
+ } else
+ /* Set the WAITERS flag, so the owner will know it has someone
+ to wake at next unlock */
+ newval = curval | FUTEX_WAITERS;
pagefault_disable();
curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
@@ -1217,11 +1775,16 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
if (unlikely(curval != uval))
goto retry_locked;
+ if (lock_held) {
+ set_pi_futex_owner(hb, &q.key, curr);
+ goto out_unlock_release_sem;
+ }
+
/*
* We dont have the lock. Look up the PI state (or create it if
* we are the first waiter):
*/
- ret = lookup_pi_state(uval, hb, &q);
+ ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
if (unlikely(ret)) {
/*
@@ -1263,7 +1826,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
* Now the futex is queued and we have checked the data, we
* don't want to hold mmap_sem while we sleep.
*/
- up_read(&curr->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
WARN_ON(!q.pi_state);
/*
@@ -1277,52 +1841,18 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
ret = ret ? 0 : -EWOULDBLOCK;
}
- down_read(&curr->mm->mmap_sem);
+ if (fshared)
+ down_read(fshared);
spin_lock(q.lock_ptr);
/*
* Got the lock. We might not be the anticipated owner if we
* did a lock-steal - fix up the PI-state in that case.
*/
- if (!ret && q.pi_state->owner != curr) {
- u32 newtid = current->pid | FUTEX_WAITERS;
-
- /* Owner died? */
- if (q.pi_state->owner != NULL) {
- spin_lock_irq(&q.pi_state->owner->pi_lock);
- WARN_ON(list_empty(&q.pi_state->list));
- list_del_init(&q.pi_state->list);
- spin_unlock_irq(&q.pi_state->owner->pi_lock);
- } else
- newtid |= FUTEX_OWNER_DIED;
-
- q.pi_state->owner = current;
-
- spin_lock_irq(&current->pi_lock);
- WARN_ON(!list_empty(&q.pi_state->list));
- list_add(&q.pi_state->list, &current->pi_state_list);
- spin_unlock_irq(&current->pi_lock);
-
- /* Unqueue and drop the lock */
- unqueue_me_pi(&q, hb);
- up_read(&curr->mm->mmap_sem);
- /*
- * We own it, so we have to replace the pending owner
- * TID. This must be atomic as we have preserve the
- * owner died bit here.
- */
- ret = get_user(uval, uaddr);
- while (!ret) {
- newval = (uval & FUTEX_OWNER_DIED) | newtid;
- curval = futex_atomic_cmpxchg_inatomic(uaddr,
- uval, newval);
- if (curval == -EFAULT)
- ret = -EFAULT;
- if (curval == uval)
- break;
- uval = curval;
- }
- } else {
+ if (!ret && q.pi_state->owner != curr)
+ /* mmap_sem is unlocked at return of this function */
+ ret = fixup_pi_state_owner(uaddr, fshared, &q, hb, curr);
+ else {
/*
* Catch the rare case, where the lock was released
* when we were on the way back before we locked
@@ -1333,8 +1863,9 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
ret = 0;
}
/* Unqueue and drop the lock */
- unqueue_me_pi(&q, hb);
- up_read(&curr->mm->mmap_sem);
+ unqueue_me_pi(&q);
+ if (fshared)
+ up_read(fshared);
}
if (!detect && ret == -EDEADLK && 0)
@@ -1346,7 +1877,8 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
queue_unlock(&q, hb);
out_release_sem:
- up_read(&curr->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
return ret;
uaddr_faulted:
@@ -1357,15 +1889,16 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
* still holding the mmap_sem.
*/
if (attempt++) {
- if (futex_handle_fault((unsigned long)uaddr, attempt)) {
- ret = -EFAULT;
+ ret = futex_handle_fault((unsigned long)uaddr, fshared,
+ attempt);
+ if (ret)
goto out_unlock_release_sem;
- }
goto retry_locked;
}
queue_unlock(&q, hb);
- up_read(&curr->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
@@ -1379,12 +1912,12 @@ static int futex_lock_pi(u32 __user *uaddr, int detect, unsigned long sec,
* This is the in-kernel slowpath: we look up the PI state (if any),
* and do the rt-mutex unlock.
*/
-static int futex_unlock_pi(u32 __user *uaddr)
+static int futex_unlock_pi(u32 __user *uaddr, struct rw_semaphore *fshared)
{
struct futex_hash_bucket *hb;
struct futex_q *this, *next;
u32 uval;
- struct list_head *head;
+ struct plist_head *head;
union futex_key key;
int ret, attempt = 0;
@@ -1399,9 +1932,10 @@ retry:
/*
* First take all the futex related locks:
*/
- down_read(&current->mm->mmap_sem);
+ if (fshared)
+ down_read(fshared);
- ret = get_futex_key(uaddr, &key);
+ ret = get_futex_key(uaddr, fshared, &key);
if (unlikely(ret != 0))
goto out;
@@ -1435,7 +1969,7 @@ retry_locked:
*/
head = &hb->chain;
- list_for_each_entry_safe(this, next, head, list) {
+ plist_for_each_entry_safe(this, next, head, list) {
if (!match_futex (&this->key, &key))
continue;
ret = wake_futex_pi(uaddr, uval, this);
@@ -1460,7 +1994,8 @@ retry_locked:
out_unlock:
spin_unlock(&hb->lock);
out:
- up_read(&current->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
return ret;
@@ -1472,15 +2007,16 @@ pi_faulted:
* still holding the mmap_sem.
*/
if (attempt++) {
- if (futex_handle_fault((unsigned long)uaddr, attempt)) {
- ret = -EFAULT;
+ ret = futex_handle_fault((unsigned long)uaddr, fshared,
+ attempt);
+ if (ret)
goto out_unlock;
- }
goto retry_locked;
}
spin_unlock(&hb->lock);
- up_read(&current->mm->mmap_sem);
+ if (fshared)
+ up_read(fshared);
ret = get_user(uval, uaddr);
if (!ret && (uval != -EFAULT))
@@ -1509,10 +2045,10 @@ static unsigned int futex_poll(struct file *filp,
poll_wait(filp, &q->waiters, wait);
/*
- * list_empty() is safe here without any lock.
+ * plist_node_empty() is safe here without any lock.
* q->lock_ptr != 0 is not safe, because of ordering against wakeup.
*/
- if (list_empty(&q->list))
+ if (plist_node_empty(&q->list))
ret = POLLIN | POLLRDNORM;
return ret;
@@ -1532,6 +2068,7 @@ static int futex_fd(u32 __user *uaddr, int signal)
struct futex_q *q;
struct file *filp;
int ret, err;
+ struct rw_semaphore *fshared;
static unsigned long printk_interval;
if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
@@ -1573,11 +2110,12 @@ static int futex_fd(u32 __user *uaddr, int signal)
}
q->pi_state = NULL;
- down_read(&current->mm->mmap_sem);
- err = get_futex_key(uaddr, &q->key);
+ fshared = &current->mm->mmap_sem;
+ down_read(fshared);
+ err = get_futex_key(uaddr, fshared, &q->key);
if (unlikely(err != 0)) {
- up_read(&current->mm->mmap_sem);
+ up_read(fshared);
kfree(q);
goto error;
}
@@ -1589,7 +2127,7 @@ static int futex_fd(u32 __user *uaddr, int signal)
filp->private_data = q;
queue_me(q, ret, filp);
- up_read(&current->mm->mmap_sem);
+ up_read(fshared);
/* Now we map fd to filp, so userspace can access it */
fd_install(ret, filp);
@@ -1702,6 +2240,8 @@ retry:
* userspace.
*/
mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+ /* Also keep the FUTEX_WAITER_REQUEUED flag if set */
+ mval |= (uval & FUTEX_WAITER_REQUEUED);
nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
if (nval == -EFAULT)
@@ -1716,7 +2256,7 @@ retry:
*/
if (!pi) {
if (uval & FUTEX_WAITERS)
- futex_wake(uaddr, 1);
+ futex_wake(uaddr, &curr->mm->mmap_sem, 1);
}
}
return 0;
@@ -1772,7 +2312,8 @@ void exit_robust_list(struct task_struct *curr)
return;
if (pending)
- handle_futex_death((void __user *)pending + futex_offset, curr, pip);
+ handle_futex_death((void __user *)pending + futex_offset,
+ curr, pip);
while (entry != &head->list) {
/*
@@ -1798,39 +2339,47 @@ void exit_robust_list(struct task_struct *curr)
}
}
-long do_futex(u32 __user *uaddr, int op, u32 val, unsigned long timeout,
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
u32 __user *uaddr2, u32 val2, u32 val3)
{
int ret;
+ int cmd = op & FUTEX_CMD_MASK;
+ struct rw_semaphore *fshared = NULL;
+
+ if (!(op & FUTEX_PRIVATE_FLAG))
+ fshared = &current->mm->mmap_sem;
- switch (op) {
+ switch (cmd) {
case FUTEX_WAIT:
- ret = futex_wait(uaddr, val, timeout);
+ ret = futex_wait(uaddr, fshared, val, timeout);
break;
case FUTEX_WAKE:
- ret = futex_wake(uaddr, val);
+ ret = futex_wake(uaddr, fshared, val);
break;
case FUTEX_FD:
/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
ret = futex_fd(uaddr, val);
break;
case FUTEX_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2, NULL);
+ ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
break;
case FUTEX_CMP_REQUEUE:
- ret = futex_requeue(uaddr, uaddr2, val, val2, &val3);
+ ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3);
break;
case FUTEX_WAKE_OP:
- ret = futex_wake_op(uaddr, uaddr2, val, val2, val3);
+ ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
break;
case FUTEX_LOCK_PI:
- ret = futex_lock_pi(uaddr, val, timeout, val2, 0);
+ ret = futex_lock_pi(uaddr, fshared, val, timeout, 0);
break;
case FUTEX_UNLOCK_PI:
- ret = futex_unlock_pi(uaddr);
+ ret = futex_unlock_pi(uaddr, fshared);
break;
case FUTEX_TRYLOCK_PI:
- ret = futex_lock_pi(uaddr, 0, timeout, val2, 1);
+ ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
+ break;
+ case FUTEX_CMP_REQUEUE_PI:
+ ret = futex_requeue_pi(uaddr, fshared, uaddr2, val, val2, &val3);
break;
default:
ret = -ENOSYS;
@@ -1843,29 +2392,30 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
struct timespec __user *utime, u32 __user *uaddr2,
u32 val3)
{
- struct timespec t;
- unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+ struct timespec ts;
+ ktime_t t, *tp = NULL;
u32 val2 = 0;
+ int cmd = op & FUTEX_CMD_MASK;
- if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
- if (copy_from_user(&t, utime, sizeof(t)) != 0)
+ if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI)) {
+ if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
return -EFAULT;
- if (!timespec_valid(&t))
+ if (!timespec_valid(&ts))
return -EINVAL;
- if (op == FUTEX_WAIT)
- timeout = timespec_to_jiffies(&t) + 1;
- else {
- timeout = t.tv_sec;
- val2 = t.tv_nsec;
- }
+
+ t = timespec_to_ktime(ts);
+ if (cmd == FUTEX_WAIT)
+ t = ktime_add(ktime_get(), t);
+ tp = &t;
}
/*
- * requeue parameter in 'utime' if op == FUTEX_REQUEUE.
+ * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
*/
- if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
+ if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE
+ || cmd == FUTEX_CMP_REQUEUE_PI)
val2 = (u32) (unsigned long) utime;
- return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
+ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
static int futexfs_get_sb(struct file_system_type *fs_type,
@@ -1895,7 +2445,7 @@ static int __init init(void)
}
for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
- INIT_LIST_HEAD(&futex_queues[i].chain);
+ plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
spin_lock_init(&futex_queues[i].lock);
}
return 0;
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index 50f24eea6cd..338a9b489fb 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -141,24 +141,24 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
struct compat_timespec __user *utime, u32 __user *uaddr2,
u32 val3)
{
- struct timespec t;
- unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
+ struct timespec ts;
+ ktime_t t, *tp = NULL;
int val2 = 0;
if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
- if (get_compat_timespec(&t, utime))
+ if (get_compat_timespec(&ts, utime))
return -EFAULT;
- if (!timespec_valid(&t))
+ if (!timespec_valid(&ts))
return -EINVAL;
+
+ t = timespec_to_ktime(ts);
if (op == FUTEX_WAIT)
- timeout = timespec_to_jiffies(&t) + 1;
- else {
- timeout = t.tv_sec;
- val2 = t.tv_nsec;
- }
+ t = ktime_add(ktime_get(), t);
+ tp = &t;
}
- if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE)
+ if (op == FUTEX_REQUEUE || op == FUTEX_CMP_REQUEUE
+ || op == FUTEX_CMP_REQUEUE_PI)
val2 = (int) (unsigned long) utime;
- return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
+ return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index c9f4f044a8a..23c03f43e19 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1411,11 +1411,13 @@ static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
+ case CPU_UP_PREPARE_FROZEN:
init_hrtimers_cpu(cpu);
break;
#ifdef CONFIG_HOTPLUG_CPU
case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
clockevents_notify(CLOCK_EVT_NOTIFY_CPU_DEAD, &cpu);
migrate_hrtimers(cpu);
break;
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 32e1ab1477d..e391cbb1f56 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -22,7 +22,6 @@
* handle_bad_irq - handle spurious and unhandled irqs
* @irq: the interrupt number
* @desc: description of the interrupt
- * @regs: pointer to a register structure
*
* Handles spurious and unhandled IRQ's. It also prints a debugmessage.
*/
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 49cc4b9c1a8..4d32eb07717 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -135,7 +135,6 @@ static int ____call_usermodehelper(void *data)
/* Unblock all signals and set the session keyring. */
new_session = key_get(sub_info->ring);
- flush_signals(current);
spin_lock_irq(&current->sighand->siglock);
old_session = __install_session_keyring(current, new_session);
flush_signal_handlers(current, 1);
@@ -186,14 +185,9 @@ static int wait_for_helper(void *data)
{
struct subprocess_info *sub_info = data;
pid_t pid;
- struct k_sigaction sa;
/* Install a handler: if SIGCLD isn't handled sys_wait4 won't
* populate the status, but will return -ECHILD. */
- sa.sa.sa_handler = SIG_IGN;
- sa.sa.sa_flags = 0;
- siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
- do_sigaction(SIGCHLD, &sa, NULL);
allow_signal(SIGCHLD);
pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 87c50ccd1d4..df8a8e8f6ca 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1,7 +1,7 @@
/* Kernel thread helper functions.
* Copyright (C) 2004 IBM Corporation, Rusty Russell.
*
- * Creation is done via keventd, so that we get a clean environment
+ * Creation is done via kthreadd, so that we get a clean environment
* even if we're invoked from userspace (think modprobe, hotplug cpu,
* etc.).
*/
@@ -15,24 +15,22 @@
#include <linux/mutex.h>
#include <asm/semaphore.h>
-/*
- * We dont want to execute off keventd since it might
- * hold a semaphore our callers hold too:
- */
-static struct workqueue_struct *helper_wq;
+static DEFINE_SPINLOCK(kthread_create_lock);
+static LIST_HEAD(kthread_create_list);
+struct task_struct *kthreadd_task;
struct kthread_create_info
{
- /* Information passed to kthread() from keventd. */
+ /* Information passed to kthread() from kthreadd. */
int (*threadfn)(void *data);
void *data;
struct completion started;
- /* Result passed back to kthread_create() from keventd. */
+ /* Result passed back to kthread_create() from kthreadd. */
struct task_struct *result;
struct completion done;
- struct work_struct work;
+ struct list_head list;
};
struct kthread_stop_info
@@ -60,42 +58,17 @@ int kthread_should_stop(void)
}
EXPORT_SYMBOL(kthread_should_stop);
-static void kthread_exit_files(void)
-{
- struct fs_struct *fs;
- struct task_struct *tsk = current;
-
- exit_fs(tsk); /* current->fs->count--; */
- fs = init_task.fs;
- tsk->fs = fs;
- atomic_inc(&fs->count);
- exit_files(tsk);
- current->files = init_task.files;
- atomic_inc(&tsk->files->count);
-}
-
static int kthread(void *_create)
{
struct kthread_create_info *create = _create;
int (*threadfn)(void *data);
void *data;
- sigset_t blocked;
int ret = -EINTR;
- kthread_exit_files();
-
- /* Copy data: it's on keventd's stack */
+ /* Copy data: it's on kthread's stack */
threadfn = create->threadfn;
data = create->data;
- /* Block and flush all signals (in case we're not from keventd). */
- sigfillset(&blocked);
- sigprocmask(SIG_BLOCK, &blocked, NULL);
- flush_signals(current);
-
- /* By default we can run anywhere, unlike keventd. */
- set_cpus_allowed(current, CPU_MASK_ALL);
-
/* OK, tell user we're spawned, wait for stop or wakeup */
__set_current_state(TASK_INTERRUPTIBLE);
complete(&create->started);
@@ -112,11 +85,8 @@ static int kthread(void *_create)
return 0;
}
-/* We are keventd: create a thread. */
-static void keventd_create_kthread(struct work_struct *work)
+static void create_kthread(struct kthread_create_info *create)
{
- struct kthread_create_info *create =
- container_of(work, struct kthread_create_info, work);
int pid;
/* We want our own signal handler (we take no signals by default). */
@@ -162,17 +132,14 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
create.data = data;
init_completion(&create.started);
init_completion(&create.done);
- INIT_WORK(&create.work, keventd_create_kthread);
-
- /*
- * The workqueue needs to start up first:
- */
- if (!helper_wq)
- create.work.func(&create.work);
- else {
- queue_work(helper_wq, &create.work);
- wait_for_completion(&create.done);
- }
+
+ spin_lock(&kthread_create_lock);
+ list_add_tail(&create.list, &kthread_create_list);
+ wake_up_process(kthreadd_task);
+ spin_unlock(&kthread_create_lock);
+
+ wait_for_completion(&create.done);
+
if (!IS_ERR(create.result)) {
va_list args;
va_start(args, namefmt);
@@ -180,7 +147,6 @@ struct task_struct *kthread_create(int (*threadfn)(void *data),
namefmt, args);
va_end(args);
}
-
return create.result;
}
EXPORT_SYMBOL(kthread_create);
@@ -245,12 +211,47 @@ int kthread_stop(struct task_struct *k)
}
EXPORT_SYMBOL(kthread_stop);
-static __init int helper_init(void)
+
+static __init void kthreadd_setup(void)
{
- helper_wq = create_singlethread_workqueue("kthread");
- BUG_ON(!helper_wq);
+ struct task_struct *tsk = current;
- return 0;
+ set_task_comm(tsk, "kthreadd");
+
+ ignore_signals(tsk);
+
+ set_user_nice(tsk, -5);
+ set_cpus_allowed(tsk, CPU_MASK_ALL);
}
-core_initcall(helper_init);
+int kthreadd(void *unused)
+{
+ /* Setup a clean context for our children to inherit. */
+ kthreadd_setup();
+
+ current->flags |= PF_NOFREEZE;
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (list_empty(&kthread_create_list))
+ schedule();
+ __set_current_state(TASK_RUNNING);
+
+ spin_lock(&kthread_create_lock);
+ while (!list_empty(&kthread_create_list)) {
+ struct kthread_create_info *create;
+
+ create = list_entry(kthread_create_list.next,
+ struct kthread_create_info, list);
+ list_del_init(&create->list);
+ spin_unlock(&kthread_create_lock);
+
+ create_kthread(create);
+
+ spin_lock(&kthread_create_lock);
+ }
+ spin_unlock(&kthread_create_lock);
+ }
+
+ return 0;
+}
diff --git a/kernel/mutex.c b/kernel/mutex.c
index e7cbbb82765..303eab18484 100644
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -133,7 +133,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
debug_mutex_lock_common(lock, &waiter);
mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
- debug_mutex_add_waiter(lock, &waiter, task->thread_info);
+ debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
/* add waiting tasks to the end of the waitqueue (FIFO): */
list_add_tail(&waiter.list, &lock->wait_list);
@@ -159,7 +159,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
*/
if (unlikely(state == TASK_INTERRUPTIBLE &&
signal_pending(task))) {
- mutex_remove_waiter(lock, &waiter, task->thread_info);
+ mutex_remove_waiter(lock, &waiter, task_thread_info(task));
mutex_release(&lock->dep_map, 1, _RET_IP_);
spin_unlock_mutex(&lock->wait_lock, flags);
@@ -175,8 +175,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass)
}
/* got the lock - rejoice! */
- mutex_remove_waiter(lock, &waiter, task->thread_info);
- debug_mutex_set_owner(lock, task->thread_info);
+ mutex_remove_waiter(lock, &waiter, task_thread_info(task));
+ debug_mutex_set_owner(lock, task_thread_info(task));
/* set it to 0 if there are no waiters left: */
if (likely(list_empty(&lock->wait_list)))
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 06331374d86..b5f0543ed84 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -30,30 +30,69 @@ char resume_file[256] = CONFIG_PM_STD_PARTITION;
dev_t swsusp_resume_device;
sector_t swsusp_resume_block;
+enum {
+ HIBERNATION_INVALID,
+ HIBERNATION_PLATFORM,
+ HIBERNATION_TEST,
+ HIBERNATION_TESTPROC,
+ HIBERNATION_SHUTDOWN,
+ HIBERNATION_REBOOT,
+ /* keep last */
+ __HIBERNATION_AFTER_LAST
+};
+#define HIBERNATION_MAX (__HIBERNATION_AFTER_LAST-1)
+#define HIBERNATION_FIRST (HIBERNATION_INVALID + 1)
+
+static int hibernation_mode = HIBERNATION_SHUTDOWN;
+
+struct hibernation_ops *hibernation_ops;
+
+/**
+ * hibernation_set_ops - set the global hibernate operations
+ * @ops: the hibernation operations to use in subsequent hibernation transitions
+ */
+
+void hibernation_set_ops(struct hibernation_ops *ops)
+{
+ if (ops && !(ops->prepare && ops->enter && ops->finish)) {
+ WARN_ON(1);
+ return;
+ }
+ mutex_lock(&pm_mutex);
+ hibernation_ops = ops;
+ if (ops)
+ hibernation_mode = HIBERNATION_PLATFORM;
+ else if (hibernation_mode == HIBERNATION_PLATFORM)
+ hibernation_mode = HIBERNATION_SHUTDOWN;
+
+ mutex_unlock(&pm_mutex);
+}
+
+
/**
* platform_prepare - prepare the machine for hibernation using the
* platform driver if so configured and return an error code if it fails
*/
-static inline int platform_prepare(void)
+static int platform_prepare(void)
{
- int error = 0;
+ return (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops) ?
+ hibernation_ops->prepare() : 0;
+}
- switch (pm_disk_mode) {
- case PM_DISK_TEST:
- case PM_DISK_TESTPROC:
- case PM_DISK_SHUTDOWN:
- case PM_DISK_REBOOT:
- break;
- default:
- if (pm_ops && pm_ops->prepare)
- error = pm_ops->prepare(PM_SUSPEND_DISK);
- }
- return error;
+/**
+ * platform_finish - switch the machine to the normal mode of operation
+ * using the platform driver (must be called after platform_prepare())
+ */
+
+static void platform_finish(void)
+{
+ if (hibernation_mode == HIBERNATION_PLATFORM && hibernation_ops)
+ hibernation_ops->finish();
}
/**
- * power_down - Shut machine down for hibernate.
+ * power_down - Shut the machine down for hibernation.
*
* Use the platform driver, if configured so; otherwise try
* to power off or reboot.
@@ -61,20 +100,20 @@ static inline int platform_prepare(void)
static void power_down(void)
{
- switch (pm_disk_mode) {
- case PM_DISK_TEST:
- case PM_DISK_TESTPROC:
+ switch (hibernation_mode) {
+ case HIBERNATION_TEST:
+ case HIBERNATION_TESTPROC:
break;
- case PM_DISK_SHUTDOWN:
+ case HIBERNATION_SHUTDOWN:
kernel_power_off();
break;
- case PM_DISK_REBOOT:
+ case HIBERNATION_REBOOT:
kernel_restart(NULL);
break;
- default:
- if (pm_ops && pm_ops->enter) {
+ case HIBERNATION_PLATFORM:
+ if (hibernation_ops) {
kernel_shutdown_prepare(SYSTEM_SUSPEND_DISK);
- pm_ops->enter(PM_SUSPEND_DISK);
+ hibernation_ops->enter();
break;
}
}
@@ -87,20 +126,6 @@ static void power_down(void)
while(1);
}
-static inline void platform_finish(void)
-{
- switch (pm_disk_mode) {
- case PM_DISK_TEST:
- case PM_DISK_TESTPROC:
- case PM_DISK_SHUTDOWN:
- case PM_DISK_REBOOT:
- break;
- default:
- if (pm_ops && pm_ops->finish)
- pm_ops->finish(PM_SUSPEND_DISK);
- }
-}
-
static void unprepare_processes(void)
{
thaw_processes();
@@ -120,13 +145,10 @@ static int prepare_processes(void)
}
/**
- * pm_suspend_disk - The granpappy of hibernation power management.
- *
- * If not, then call swsusp to do its thing, then figure out how
- * to power down the system.
+ * hibernate - The granpappy of the built-in hibernation management
*/
-int pm_suspend_disk(void)
+int hibernate(void)
{
int error;
@@ -143,7 +165,8 @@ int pm_suspend_disk(void)
if (error)
goto Finish;
- if (pm_disk_mode == PM_DISK_TESTPROC) {
+ mutex_lock(&pm_mutex);
+ if (hibernation_mode == HIBERNATION_TESTPROC) {
printk("swsusp debug: Waiting for 5 seconds.\n");
mdelay(5000);
goto Thaw;
@@ -168,7 +191,7 @@ int pm_suspend_disk(void)
if (error)
goto Enable_cpus;
- if (pm_disk_mode == PM_DISK_TEST) {
+ if (hibernation_mode == HIBERNATION_TEST) {
printk("swsusp debug: Waiting for 5 seconds.\n");
mdelay(5000);
goto Enable_cpus;
@@ -205,6 +228,7 @@ int pm_suspend_disk(void)
device_resume();
resume_console();
Thaw:
+ mutex_unlock(&pm_mutex);
unprepare_processes();
Finish:
free_basic_memory_bitmaps();
@@ -220,7 +244,7 @@ int pm_suspend_disk(void)
* Called as a late_initcall (so all devices are discovered and
* initialized), we call swsusp to see if we have a saved image or not.
* If so, we quiesce devices, the restore the saved image. We will
- * return above (in pm_suspend_disk() ) if everything goes well.
+ * return above (in hibernate() ) if everything goes well.
* Otherwise, we fail gracefully and return to the normally
* scheduled program.
*
@@ -315,25 +339,26 @@ static int software_resume(void)
late_initcall(software_resume);
-static const char * const pm_disk_modes[] = {
- [PM_DISK_PLATFORM] = "platform",
- [PM_DISK_SHUTDOWN] = "shutdown",
- [PM_DISK_REBOOT] = "reboot",
- [PM_DISK_TEST] = "test",
- [PM_DISK_TESTPROC] = "testproc",
+static const char * const hibernation_modes[] = {
+ [HIBERNATION_PLATFORM] = "platform",
+ [HIBERNATION_SHUTDOWN] = "shutdown",
+ [HIBERNATION_REBOOT] = "reboot",
+ [HIBERNATION_TEST] = "test",
+ [HIBERNATION_TESTPROC] = "testproc",
};
/**
- * disk - Control suspend-to-disk mode
+ * disk - Control hibernation mode
*
* Suspend-to-disk can be handled in several ways. We have a few options
* for putting the system to sleep - using the platform driver (e.g. ACPI
- * or other pm_ops), powering off the system or rebooting the system
- * (for testing) as well as the two test modes.
+ * or other hibernation_ops), powering off the system or rebooting the
+ * system (for testing) as well as the two test modes.
*
* The system can support 'platform', and that is known a priori (and
- * encoded in pm_ops). However, the user may choose 'shutdown' or 'reboot'
- * as alternatives, as well as the test modes 'test' and 'testproc'.
+ * encoded by the presence of hibernation_ops). However, the user may
+ * choose 'shutdown' or 'reboot' as alternatives, as well as one fo the
+ * test modes, 'test' or 'testproc'.
*
* show() will display what the mode is currently set to.
* store() will accept one of
@@ -345,7 +370,7 @@ static const char * const pm_disk_modes[] = {
* 'testproc'
*
* It will only change to 'platform' if the system
- * supports it (as determined from pm_ops->pm_disk_mode).
+ * supports it (as determined by having hibernation_ops).
*/
static ssize_t disk_show(struct kset *kset, char *buf)
@@ -353,28 +378,25 @@ static ssize_t disk_show(struct kset *kset, char *buf)
int i;
char *start = buf;
- for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) {
- if (!pm_disk_modes[i])
+ for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
+ if (!hibernation_modes[i])
continue;
switch (i) {
- case PM_DISK_SHUTDOWN:
- case PM_DISK_REBOOT:
- case PM_DISK_TEST:
- case PM_DISK_TESTPROC:
+ case HIBERNATION_SHUTDOWN:
+ case HIBERNATION_REBOOT:
+ case HIBERNATION_TEST:
+ case HIBERNATION_TESTPROC:
break;
- default:
- if (pm_ops && pm_ops->enter &&
- (i == pm_ops->pm_disk_mode))
+ case HIBERNATION_PLATFORM:
+ if (hibernation_ops)
break;
/* not a valid mode, continue with loop */
continue;
}
- if (i == pm_disk_mode)
- buf += sprintf(buf, "[%s]", pm_disk_modes[i]);
+ if (i == hibernation_mode)
+ buf += sprintf(buf, "[%s] ", hibernation_modes[i]);
else
- buf += sprintf(buf, "%s", pm_disk_modes[i]);
- if (i+1 != PM_DISK_MAX)
- buf += sprintf(buf, " ");
+ buf += sprintf(buf, "%s ", hibernation_modes[i]);
}
buf += sprintf(buf, "\n");
return buf-start;
@@ -387,39 +409,38 @@ static ssize_t disk_store(struct kset *kset, const char *buf, size_t n)
int i;
int len;
char *p;
- suspend_disk_method_t mode = 0;
+ int mode = HIBERNATION_INVALID;
p = memchr(buf, '\n', n);
len = p ? p - buf : n;
mutex_lock(&pm_mutex);
- for (i = PM_DISK_PLATFORM; i < PM_DISK_MAX; i++) {
- if (!strncmp(buf, pm_disk_modes[i], len)) {
+ for (i = HIBERNATION_FIRST; i <= HIBERNATION_MAX; i++) {
+ if (!strncmp(buf, hibernation_modes[i], len)) {
mode = i;
break;
}
}
- if (mode) {
+ if (mode != HIBERNATION_INVALID) {
switch (mode) {
- case PM_DISK_SHUTDOWN:
- case PM_DISK_REBOOT:
- case PM_DISK_TEST:
- case PM_DISK_TESTPROC:
- pm_disk_mode = mode;
+ case HIBERNATION_SHUTDOWN:
+ case HIBERNATION_REBOOT:
+ case HIBERNATION_TEST:
+ case HIBERNATION_TESTPROC:
+ hibernation_mode = mode;
break;
- default:
- if (pm_ops && pm_ops->enter &&
- (mode == pm_ops->pm_disk_mode))
- pm_disk_mode = mode;
+ case HIBERNATION_PLATFORM:
+ if (hibernation_ops)
+ hibernation_mode = mode;
else
error = -EINVAL;
}
- } else {
+ } else
error = -EINVAL;
- }
- pr_debug("PM: suspend-to-disk mode set to '%s'\n",
- pm_disk_modes[mode]);
+ if (!error)
+ pr_debug("PM: suspend-to-disk mode set to '%s'\n",
+ hibernation_modes[mode]);
mutex_unlock(&pm_mutex);
return error ? error : n;
}
diff --git a/kernel/power/main.c b/kernel/power/main.c
index f6dda685e7e..40d56a31245 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -30,7 +30,6 @@
DEFINE_MUTEX(pm_mutex);
struct pm_ops *pm_ops;
-suspend_disk_method_t pm_disk_mode = PM_DISK_SHUTDOWN;
/**
* pm_set_ops - Set the global power method table.
@@ -41,10 +40,6 @@ void pm_set_ops(struct pm_ops * ops)
{
mutex_lock(&pm_mutex);
pm_ops = ops;
- if (ops && ops->pm_disk_mode != PM_DISK_INVALID) {
- pm_disk_mode = ops->pm_disk_mode;
- } else
- pm_disk_mode = PM_DISK_SHUTDOWN;
mutex_unlock(&pm_mutex);
}
@@ -184,24 +179,12 @@ static void suspend_finish(suspend_state_t state)
static const char * const pm_states[PM_SUSPEND_MAX] = {
[PM_SUSPEND_STANDBY] = "standby",
[PM_SUSPEND_MEM] = "mem",
- [PM_SUSPEND_DISK] = "disk",
};
static inline int valid_state(suspend_state_t state)
{
- /* Suspend-to-disk does not really need low-level support.
- * It can work with shutdown/reboot if needed. If it isn't
- * configured, then it cannot be supported.
- */
- if (state == PM_SUSPEND_DISK)
-#ifdef CONFIG_SOFTWARE_SUSPEND
- return 1;
-#else
- return 0;
-#endif
-
- /* all other states need lowlevel support and need to be
- * valid to the lowlevel implementation, no valid callback
+ /* All states need lowlevel support and need to be valid
+ * to the lowlevel implementation, no valid callback
* implies that none are valid. */
if (!pm_ops || !pm_ops->valid || !pm_ops->valid(state))
return 0;
@@ -229,11 +212,6 @@ static int enter_state(suspend_state_t state)
if (!mutex_trylock(&pm_mutex))
return -EBUSY;
- if (state == PM_SUSPEND_DISK) {
- error = pm_suspend_disk();
- goto Unlock;
- }
-
pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]);
if ((error = suspend_prepare(state)))
goto Unlock;
@@ -251,7 +229,7 @@ static int enter_state(suspend_state_t state)
/**
* pm_suspend - Externally visible function for suspending system.
- * @state: Enumarted value of state to enter.
+ * @state: Enumerated value of state to enter.
*
* Determine whether or not value is within range, get state
* structure, and enter (above).
@@ -289,7 +267,13 @@ static ssize_t state_show(struct kset *kset, char *buf)
if (pm_states[i] && valid_state(i))
s += sprintf(s,"%s ", pm_states[i]);
}
- s += sprintf(s,"\n");
+#ifdef CONFIG_SOFTWARE_SUSPEND
+ s += sprintf(s, "%s\n", "disk");
+#else
+ if (s != buf)
+ /* convert the last space to a newline */
+ *(s-1) = '\n';
+#endif
return (s - buf);
}
@@ -304,6 +288,12 @@ static ssize_t state_store(struct kset *kset, const char *buf, size_t n)
p = memchr(buf, '\n', n);
len = p ? p - buf : n;
+ /* First, check if we are requested to hibernate */
+ if (!strncmp(buf, "disk", len)) {
+ error = hibernate();
+ return error ? error : n;
+ }
+
for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) {
if (*s && !strncmp(buf, *s, len))
break;
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 34b43542785..51381487103 100644
--- a/kernel/power/power.h
+++ b/