aboutsummaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/DMA-API-HOWTO.txt37
-rw-r--r--Documentation/DMA-API.txt8
-rw-r--r--Documentation/DocBook/media/Makefile2
-rw-r--r--Documentation/DocBook/media_api.tmpl4
-rw-r--r--Documentation/SubmittingPatches22
-rw-r--r--Documentation/arm/small_task_packing.txt136
-rw-r--r--Documentation/arm64/booting.txt26
-rw-r--r--Documentation/arm64/memory.txt44
-rw-r--r--Documentation/arm64/tagged-pointers.txt34
-rw-r--r--Documentation/devicetree/bindings/arm/arch_timer.txt59
-rw-r--r--Documentation/devicetree/bindings/arm/cci.txt172
-rw-r--r--Documentation/devicetree/bindings/arm/gic.txt6
-rw-r--r--Documentation/devicetree/bindings/arm/pmu.txt3
-rw-r--r--Documentation/devicetree/bindings/arm/rtsm-dcscb.txt19
-rw-r--r--Documentation/devicetree/bindings/ata/marvell.txt2
-rw-r--r--Documentation/devicetree/bindings/mailbox/mailbox.txt38
-rw-r--r--Documentation/devicetree/bindings/mfd/vexpress-spc.txt35
-rw-r--r--Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt52
-rw-r--r--Documentation/devicetree/bindings/power/power_domain.txt49
-rw-r--r--Documentation/devicetree/bindings/thermal/thermal.txt595
-rw-r--r--Documentation/filesystems/proc.txt4
-rw-r--r--Documentation/hwmon/k10temp1
-rw-r--r--Documentation/i2c/busses/i2c-i8013
-rw-r--r--Documentation/i2c/busses/i2c-piix42
-rw-r--r--Documentation/input/elantech.txt5
-rw-r--r--Documentation/ja_JP/HOWTO2
-rw-r--r--Documentation/ja_JP/stable_kernel_rules.txt6
-rw-r--r--Documentation/kernel-parameters.txt28
-rw-r--r--Documentation/lzo.txt164
-rw-r--r--Documentation/mailbox.txt122
-rw-r--r--Documentation/networking/ip-sysctl.txt12
-rw-r--r--Documentation/networking/packet_mmap.txt10
-rw-r--r--Documentation/parisc/registers8
-rw-r--r--Documentation/pinctrl.txt11
-rw-r--r--Documentation/sound/alsa/ALSA-Configuration.txt4
-rw-r--r--Documentation/stable_kernel_rules.txt3
-rw-r--r--Documentation/sysctl/kernel.txt51
-rw-r--r--Documentation/thermal/sysfs-api.txt5
-rw-r--r--Documentation/video4linux/gspca.txt1
-rw-r--r--Documentation/virtual/kvm/api.txt186
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic.txt83
-rw-r--r--Documentation/virtual/kvm/devices/vfio.txt22
-rw-r--r--Documentation/virtual/kvm/locking.txt8
-rw-r--r--Documentation/x86/x86_64/mm.txt2
-rw-r--r--Documentation/zh_CN/HOWTO2
-rw-r--r--Documentation/zh_CN/stable_kernel_rules.txt2
46 files changed, 1990 insertions, 100 deletions
diff --git a/Documentation/DMA-API-HOWTO.txt b/Documentation/DMA-API-HOWTO.txt
index 14129f149a75..5e983031cc11 100644
--- a/Documentation/DMA-API-HOWTO.txt
+++ b/Documentation/DMA-API-HOWTO.txt
@@ -101,14 +101,23 @@ style to do this even if your device holds the default setting,
because this shows that you did think about these issues wrt. your
device.
-The query is performed via a call to dma_set_mask():
+The query is performed via a call to dma_set_mask_and_coherent():
- int dma_set_mask(struct device *dev, u64 mask);
+ int dma_set_mask_and_coherent(struct device *dev, u64 mask);
-The query for consistent allocations is performed via a call to
-dma_set_coherent_mask():
+which will query the mask for both streaming and coherent APIs together.
+If you have some special requirements, then the following two separate
+queries can be used instead:
- int dma_set_coherent_mask(struct device *dev, u64 mask);
+ The query for streaming mappings is performed via a call to
+ dma_set_mask():
+
+ int dma_set_mask(struct device *dev, u64 mask);
+
+ The query for consistent allocations is performed via a call
+ to dma_set_coherent_mask():
+
+ int dma_set_coherent_mask(struct device *dev, u64 mask);
Here, dev is a pointer to the device struct of your device, and mask
is a bit mask describing which bits of an address your device
@@ -137,7 +146,7 @@ exactly why.
The standard 32-bit addressing device would do something like this:
- if (dma_set_mask(dev, DMA_BIT_MASK(32))) {
+ if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
printk(KERN_WARNING
"mydev: No suitable DMA available.\n");
goto ignore_this_device;
@@ -171,22 +180,20 @@ the case would look like this:
int using_dac, consistent_using_dac;
- if (!dma_set_mask(dev, DMA_BIT_MASK(64))) {
+ if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64))) {
using_dac = 1;
consistent_using_dac = 1;
- dma_set_coherent_mask(dev, DMA_BIT_MASK(64));
- } else if (!dma_set_mask(dev, DMA_BIT_MASK(32))) {
+ } else if (!dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32))) {
using_dac = 0;
consistent_using_dac = 0;
- dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
} else {
printk(KERN_WARNING
"mydev: No suitable DMA available.\n");
goto ignore_this_device;
}
-dma_set_coherent_mask() will always be able to set the same or a
-smaller mask as dma_set_mask(). However for the rare case that a
+The coherent coherent mask will always be able to set the same or a
+smaller mask as the streaming mask. However for the rare case that a
device driver only uses consistent allocations, one would have to
check the return value from dma_set_coherent_mask().
@@ -199,9 +206,9 @@ address you might do something like:
goto ignore_this_device;
}
-When dma_set_mask() is successful, and returns zero, the kernel saves
-away this mask you have provided. The kernel will use this
-information later when you make DMA mappings.
+When dma_set_mask() or dma_set_mask_and_coherent() is successful, and
+returns zero, the kernel saves away this mask you have provided. The
+kernel will use this information later when you make DMA mappings.
There is a case which we are aware of at this time, which is worth
mentioning in this documentation. If your device supports multiple
diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt
index 78a6c569d204..e865279cec58 100644
--- a/Documentation/DMA-API.txt
+++ b/Documentation/DMA-API.txt
@@ -142,6 +142,14 @@ internal API for use by the platform than an external API for use by
driver writers.
int
+dma_set_mask_and_coherent(struct device *dev, u64 mask)
+
+Checks to see if the mask is possible and updates the device
+streaming and coherent DMA mask parameters if it is.
+
+Returns: 0 if successful and a negative error if not.
+
+int
dma_set_mask(struct device *dev, u64 mask)
Checks to see if the mask is possible and updates the device
diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile
index f9fd615427fb..1d27f0a1abd1 100644
--- a/Documentation/DocBook/media/Makefile
+++ b/Documentation/DocBook/media/Makefile
@@ -195,7 +195,7 @@ DVB_DOCUMENTED = \
#
install_media_images = \
- $(Q)cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api
+ $(Q)-cp $(OBJIMGFILES) $(MEDIA_SRC_DIR)/v4l/*.svg $(MEDIA_OBJ_DIR)/media_api
$(MEDIA_OBJ_DIR)/%: $(MEDIA_SRC_DIR)/%.b64
$(Q)base64 -d $< >$@
diff --git a/Documentation/DocBook/media_api.tmpl b/Documentation/DocBook/media_api.tmpl
index 6a8b7158697f..9c92bb879b6d 100644
--- a/Documentation/DocBook/media_api.tmpl
+++ b/Documentation/DocBook/media_api.tmpl
@@ -1,6 +1,6 @@
<?xml version="1.0"?>
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.1.2//EN"
- "http://www.oasis-open.org/docbook/xml/4.1.2/docbookx.dtd" [
+<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
+ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [
<!ENTITY % media-entities SYSTEM "./media-entities.tmpl"> %media-entities;
<!ENTITY media-indices SYSTEM "./media-indices.tmpl">
diff --git a/Documentation/SubmittingPatches b/Documentation/SubmittingPatches
index 6e97e73d87b5..4dbba7e100a1 100644
--- a/Documentation/SubmittingPatches
+++ b/Documentation/SubmittingPatches
@@ -131,6 +131,20 @@ If you cannot condense your patch set into a smaller set of patches,
then only post say 15 or so at a time and wait for review and integration.
+If your patch fixes a bug in a specific commit, e.g. you found an issue using
+git-bisect, please use the 'Fixes:' tag with the first 12 characters of the
+SHA-1 ID, and the one line summary.
+Example:
+
+ Fixes: e21d2170f366 ("video: remove unnecessary platform_set_drvdata()")
+
+The following git-config settings can be used to add a pretty format for
+outputting the above style in the git log or git show commands
+
+ [core]
+ abbrev = 12
+ [pretty]
+ fixes = Fixes: %h (\"%s\")
4) Style check your changes.
@@ -420,7 +434,7 @@ person it names. This tag documents that potentially interested parties
have been included in the discussion
-14) Using Reported-by:, Tested-by:, Reviewed-by: and Suggested-by:
+14) Using Reported-by:, Tested-by:, Reviewed-by:, Suggested-by: and Fixes:
If this patch fixes a problem reported by somebody else, consider adding a
Reported-by: tag to credit the reporter for their contribution. Please
@@ -475,6 +489,12 @@ idea was not posted in a public forum. That said, if we diligently credit our
idea reporters, they will, hopefully, be inspired to help us again in the
future.
+A Fixes: tag indicates that the patch fixes an issue in a previous commit. It
+is used to make it easy to determine where a bug originated, which can help
+review a bug fix. This tag also assists the stable kernel team in determining
+which stable kernel versions should receive your fix. This is the preferred
+method for indicating a bug fixed by the patch. See #2 above for more details.
+
15) The canonical patch format
diff --git a/Documentation/arm/small_task_packing.txt b/Documentation/arm/small_task_packing.txt
new file mode 100644
index 000000000000..43f0a8b80234
--- /dev/null
+++ b/Documentation/arm/small_task_packing.txt
@@ -0,0 +1,136 @@
+Small Task Packing in the big.LITTLE MP Reference Patch Set
+
+What is small task packing?
+----
+Simply that the scheduler will fit as many small tasks on a single CPU
+as possible before using other CPUs. A small task is defined as one
+whose tracked load is less than 90% of a NICE_0 task. This is a change
+from the usual behavior since the scheduler will normally use an idle
+CPU for a waking task unless that task is considered cache hot.
+
+
+How is it implemented?
+----
+Since all small tasks must wake up relatively frequently, the main
+requirement for packing small tasks is to select a partly-busy CPU when
+waking rather than looking for an idle CPU. We use the tracked load of
+the CPU runqueue to determine how heavily loaded each CPU is and the
+tracked load of the task to determine if it will fit on the CPU. We
+always start with the lowest-numbered CPU in a sched domain and stop
+looking when we find a CPU with enough space for the task.
+
+Some further tweaks are necessary to suppress load balancing when the
+CPU is not fully loaded, otherwise the scheduler attempts to spread
+tasks evenly across the domain.
+
+
+How does it interact with the HMP patches?
+----
+Firstly, we only enable packing on the little domain. The intent is that
+the big domain is intended to spread tasks amongst the available CPUs
+one-task-per-CPU. The little domain however is attempting to use as
+little power as possible while servicing its tasks.
+
+Secondly, since we offload big tasks onto little CPUs in order to try
+to devote one CPU to each task, we have a threshold above which we do
+not try to pack a task and instead will select an idle CPU if possible.
+This maintains maximum forward progress for busy tasks temporarily
+demoted from big CPUs.
+
+
+Can the behaviour be tuned?
+----
+Yes, the load level of a 'full' CPU can be easily modified in the source
+and is exposed through sysfs as /sys/kernel/hmp/packing_limit to be
+changed at runtime. The presence of the packing behaviour is controlled
+by CONFIG_SCHED_HMP_LITTLE_PACKING and can be disabled at run-time
+using /sys/kernel/hmp/packing_enable.
+The definition of a small task is hard coded as 90% of NICE_0_LOAD
+and cannot be modified at run time.
+
+
+Why do I need to tune it?
+----
+The optimal configuration is likely to be different depending upon the
+design and manufacturing of your SoC.
+
+In the main, there are two system effects from enabling small task
+packing.
+
+1. CPU operating point may increase
+2. wakeup latency of tasks may be increased
+
+There are also likely to be secondary effects from loading one CPU
+rather than spreading tasks.
+
+Note that all of these system effects are dependent upon the workload
+under consideration.
+
+
+CPU Operating Point
+----
+The primary impact of loading one CPU with a number of light tasks is to
+increase the compute requirement of that CPU since it is no longer idle
+as often. Increased compute requirement causes an increase in the
+frequency of the CPU through CPUfreq.
+
+Consider this example:
+We have a system with 3 CPUs which can operate at any frequency between
+350MHz and 1GHz. The system has 6 tasks which would each produce 10%
+load at 1GHz. The scheduler has frequency-invariant load scaling
+enabled. Our DVFS governor aims for 80% utilization at the chosen
+frequency.
+
+Without task packing, these tasks will be spread out amongst all CPUs
+such that each has 2. This will produce roughly 20% system load, and
+the frequency of the package will remain at 350MHz.
+
+With task packing set to the default packing_limit, all of these tasks
+will sit on one CPU and require a package frequency of ~750MHz to reach
+80% utilization. (0.75 = 0.6 * 0.8).
+
+When a package operates on a single frequency domain, all CPUs in that
+package share frequency and voltage.
+
+Depending upon the SoC implementation there can be a significant amount
+of energy lost to leakage from idle CPUs. The decision about how
+loaded a CPU must be to be considered 'full' is therefore controllable
+through sysfs (sys/kernel/hmp/packing_limit) and directly in the code.
+
+Continuing the example, lets set packing_limit to 450 which means we
+will pack tasks until the total load of all running tasks >= 450. In
+practise, this is very similar to a 55% idle 1Ghz CPU.
+
+Now we are only able to place 4 tasks on CPU0, and two will overflow
+onto CPU1. CPU0 will have a load of 40% and CPU1 will have a load of
+20%. In order to still hit 80% utilization, CPU0 now only needs to
+operate at (0.4*0.8=0.32) 320MHz, which means that the lowest operating
+point will be selected, the same as in the non-packing case, except that
+now CPU2 is no longer needed and can be power-gated.
+
+In order to use less energy, the saving from power-gating CPU2 must be
+more than the energy spent running CPU0 for the extra cycles. This
+depends upon the SoC implementation.
+
+This is obviously a contrived example requiring all the tasks to
+be runnable at the same time, but it illustrates the point.
+
+
+Wakeup Latency
+----
+This is an unavoidable consequence of trying to pack tasks together
+rather than giving them a CPU each. If you cannot find an acceptable
+level of wakeup latency, you should turn packing off.
+
+Cyclictest is a good test application for determining the added latency
+when configuring packing.
+
+
+Why is it turned off for the VersatileExpress V2P_CA15A7 CoreTile?
+----
+Simply, this core tile only has power gating for the whole A7 package.
+When small task packing is enabled, all our low-energy use cases
+normally fit onto one A7 CPU. We therefore end up with 2 mostly-idle
+CPUs and one mostly-busy CPU. This decreases the amount of time
+available where the whole package is idle and can be turned off.
+
diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt
index 9c4d388daddc..1b0c968098aa 100644
--- a/Documentation/arm64/booting.txt
+++ b/Documentation/arm64/booting.txt
@@ -68,13 +68,23 @@ Image target is available instead.
Requirement: MANDATORY
-The decompressed kernel image contains a 32-byte header as follows:
+The decompressed kernel image contains a 64-byte header as follows:
- u32 magic = 0x14000008; /* branch to stext, little-endian */
- u32 res0 = 0; /* reserved */
+ u32 code0; /* Executable code */
+ u32 code1; /* Executable code */
u64 text_offset; /* Image load offset */
+ u64 res0 = 0; /* reserved */
u64 res1 = 0; /* reserved */
u64 res2 = 0; /* reserved */
+ u64 res3 = 0; /* reserved */
+ u64 res4 = 0; /* reserved */
+ u32 magic = 0x644d5241; /* Magic number, little endian, "ARM\x64" */
+ u32 res5 = 0; /* reserved */
+
+
+Header notes:
+
+- code0/code1 are responsible for branching to stext.
The image must be placed at the specified offset (currently 0x80000)
from the start of the system RAM and called there. The start of the
@@ -101,8 +111,14 @@ Before jumping into the kernel, the following conditions must be met:
- Caches, MMUs
The MMU must be off.
Instruction cache may be on or off.
- Data cache must be off and invalidated.
- External caches (if present) must be configured and disabled.
+ The address range corresponding to the loaded kernel image must be
+ cleaned to the PoC. In the presence of a system cache or other
+ coherent masters with caches enabled, this will typically require
+ cache maintenance by VA rather than set/way operations.
+ System caches which respect the architected cache maintenance by VA
+ operations must be configured and may be enabled.
+ System caches which do not respect architected cache maintenance by VA
+ operations (not recommended) must be configured and disabled.
- Architected timers
CNTFRQ must be programmed with the timer frequency.
diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt
index 5f583af0a6e1..d50fa618371b 100644
--- a/Documentation/arm64/memory.txt
+++ b/Documentation/arm64/memory.txt
@@ -21,7 +21,7 @@ The swapper_pgd_dir address is written to TTBR1 and never written to
TTBR0.
-AArch64 Linux memory layout:
+AArch64 Linux memory layout with 4KB pages:
Start End Size Use
-----------------------------------------------------------------------
@@ -35,17 +35,46 @@ ffffffbc00000000 ffffffbdffffffff 8GB vmemmap
ffffffbe00000000 ffffffbffbbfffff ~8GB [guard, future vmmemap]
-ffffffbffbc00000 ffffffbffbdfffff 2MB earlyprintk device
+ffffffbffa000000 ffffffbffaffffff 16MB PCI I/O space
-ffffffbffbe00000 ffffffbffbe0ffff 64KB PCI I/O space
+ffffffbffb000000 ffffffbffbbfffff 12MB [guard]
-ffffffbbffff0000 ffffffbcffffffff ~2MB [guard]
+ffffffbffbc00000 ffffffbffbdfffff 2MB fixed mappings
+
+ffffffbffbe00000 ffffffbffbffffff 2MB [guard]
ffffffbffc000000 ffffffbfffffffff 64MB modules
ffffffc000000000 ffffffffffffffff 256GB kernel logical memory map
+AArch64 Linux memory layout with 64KB pages:
+
+Start End Size Use
+-----------------------------------------------------------------------
+0000000000000000 000003ffffffffff 4TB user
+
+fffffc0000000000 fffffdfbfffeffff ~2TB vmalloc
+
+fffffdfbffff0000 fffffdfbffffffff 64KB [guard page]
+
+fffffdfc00000000 fffffdfdffffffff 8GB vmemmap
+
+fffffdfe00000000 fffffdfffbbfffff ~8GB [guard, future vmmemap]
+
+fffffdfffa000000 fffffdfffaffffff 16MB PCI I/O space
+
+fffffdfffb000000 fffffdfffbbfffff 12MB [guard]
+
+fffffdfffbc00000 fffffdfffbdfffff 2MB fixed mappings
+
+fffffdfffbe00000 fffffdfffbffffff 2MB [guard]
+
+fffffdfffc000000 fffffdffffffffff 64MB modules
+
+fffffe0000000000 ffffffffffffffff 2TB kernel logical memory map
+
+
Translation table lookup with 4KB pages:
+--------+--------+--------+--------+--------+--------+--------+--------+
@@ -73,3 +102,10 @@ Translation table lookup with 64KB pages:
| | +--------------------------> [41:29] L2 index (only 38:29 used)
| +-------------------------------> [47:42] L1 index (not used)
+-------------------------------------------------> [63] TTBR0/1
+
+When using KVM, the hypervisor maps kernel pages in EL2, at a fixed
+offset from the kernel VA (top 24bits of the kernel VA set to zero):
+
+Start End Size Use
+-----------------------------------------------------------------------
+0000004000000000 0000007fffffffff 256GB kernel objects mapped in HYP
diff --git a/Documentation/arm64/tagged-pointers.txt b/Documentation/arm64/tagged-pointers.txt
new file mode 100644
index 000000000000..d9995f1f51b3
--- /dev/null
+++ b/Documentation/arm64/tagged-pointers.txt
@@ -0,0 +1,34 @@
+ Tagged virtual addresses in AArch64 Linux
+ =========================================
+
+Author: Will Deacon <will.deacon@arm.com>
+Date : 12 June 2013
+
+This document briefly describes the provision of tagged virtual
+addresses in the AArch64 translation system and their potential uses
+in AArch64 Linux.
+
+The kernel configures the translation tables so that translations made
+via TTBR0 (i.e. userspace mappings) have the top byte (bits 63:56) of
+the virtual address ignored by the translation hardware. This frees up
+this byte for application use, with the following caveats:
+
+ (1) The kernel requires that all user addresses passed to EL1
+ are tagged with tag 0x00. This means that any syscall
+ parameters containing user virtual addresses *must* have
+ their top byte cleared before trapping to the kernel.
+
+ (2) Non-zero tags are not preserved when delivering signals.
+ This means that signal handlers in applications making use
+ of tags cannot rely on the tag information for user virtual
+ addresses being maintained for fields inside siginfo_t.
+ One exception to this rule is for signals raised in response
+ to watchpoint debug exceptions, where the tag information
+ will be preserved.
+
+ (3) Special care should be taken when using tagged pointers,
+ since it is likely that C compilers will not hazard two
+ virtual addresses differing only in the upper byte.
+
+The architecture prevents the use of a tagged PC, so the upper byte will
+be set to a sign-extension of bit 55 on exception return.
diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt
index 20746e5abe6f..06fc7602593a 100644
--- a/Documentation/devicetree/bindings/arm/arch_timer.txt
+++ b/Documentation/devicetree/bindings/arm/arch_timer.txt
@@ -1,10 +1,14 @@
* ARM architected timer
-ARM cores may have a per-core architected timer, which provides per-cpu timers.
+ARM cores may have a per-core architected timer, which provides per-cpu timers,
+or a memory mapped architected timer, which provides up to 8 frames with a
+physical and optional virtual timer per frame.
-The timer is attached to a GIC to deliver its per-processor interrupts.
+The per-core architected timer is attached to a GIC to deliver its
+per-processor interrupts via PPIs. The memory mapped timer is attached to a GIC
+to deliver its interrupts via SPIs.
-** Timer node properties:
+** CP15 Timer node properties:
- compatible : Should at least contain one of
"arm,armv7-timer"
@@ -26,3 +30,52 @@ Example:
<1 10 0xf08>;
clock-frequency = <100000000>;
};
+
+** Memory mapped timer node properties:
+
+- compatible : Should at least contain "arm,armv7-timer-mem".
+
+- clock-frequency : The frequency of the main counter, in Hz. Optional.
+
+- reg : The control frame base address.
+
+Note that #address-cells, #size-cells, and ranges shall be present to ensure
+the CPU can address a frame's registers.
+
+A timer node has up to 8 frame sub-nodes, each with the following properties:
+
+- frame-number: 0 to 7.
+
+- interrupts : Interrupt list for physical and virtual timers in that order.
+ The virtual timer interrupt is optional.
+
+- reg : The first and second view base addresses in that order. The second view
+ base address is optional.
+
+- status : "disabled" indicates the frame is not available for use. Optional.
+
+Example:
+
+ timer@f0000000 {
+ compatible = "arm,armv7-timer-mem";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+ reg = <0xf0000000 0x1000>;
+ clock-frequency = <50000000>;
+
+ frame@f0001000 {
+ frame-number = <0>
+ interrupts = <0 13 0x8>,
+ <0 14 0x8>;
+ reg = <0xf0001000 0x1000>,
+ <0xf0002000 0x1000>;
+ };
+
+ frame@f0003000 {
+ frame-number = <1>
+ interrupts = <0 15 0x8>;
+ reg = <0xf0003000 0x1000>;
+ status = "disabled";
+ };
+ };
diff --git a/Documentation/devicetree/bindings/arm/cci.txt b/Documentation/devicetree/bindings/arm/cci.txt
new file mode 100644
index 000000000000..92d36e2aa877
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/cci.txt
@@ -0,0 +1,172 @@
+=======================================================
+ARM CCI cache coherent interconnect binding description
+=======================================================
+
+ARM multi-cluster systems maintain intra-cluster coherency through a
+cache coherent interconnect (CCI) that is capable of monitoring bus
+transactions and manage coherency, TLB invalidations and memory barriers.
+
+It allows snooping and distributed virtual memory message broadcast across
+clusters, through memory mapped interface, with a global control register
+space and multiple sets of interface control registers, one per slave
+interface.
+
+Bindings for the CCI node follow the ePAPR standard, available from:
+
+www.power.org/documentation/epapr-version-1-1/
+
+with the addition of the bindings described in this document which are
+specific to ARM.
+
+* CCI interconnect node
+
+ Description: Describes a CCI cache coherent Interconnect component
+
+ Node name must be "cci".
+ Node's parent must be the root node /, and the address space visible
+ through the CCI interconnect is the same as the one seen from the
+ root node (ie from CPUs perspective as per DT standard).
+ Every CCI node has to define the following properties:
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: must be set to
+ "arm,cci-400"
+
+ - reg
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Specifies base physical
+ address of CCI control registers common to all
+ interfaces.
+
+ - ranges:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: A standard property. Follow rules in the ePAPR for
+ hierarchical bus addressing. CCI interfaces
+ addresses refer to the parent node addressing
+ scheme to declare their register bases.
+
+ CCI interconnect node can define the following child nodes:
+
+ - CCI control interface nodes
+
+ Node name must be "slave-if".
+ Parent node must be CCI interconnect node.
+
+ A CCI control interface node must contain the following
+ properties:
+
+ - compatible
+ Usage: required
+ Value type: <string>
+ Definition: must be set to
+ "arm,cci-400-ctrl-if"
+
+ - interface-type:
+ Usage: required
+ Value type: <string>
+ Definition: must be set to one of {"ace", "ace-lite"}
+ depending on the interface type the node
+ represents.
+
+ - reg:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: the base address and size of the
+ corresponding interface programming
+ registers.
+
+* CCI interconnect bus masters
+
+ Description: masters in the device tree connected to a CCI port
+ (inclusive of CPUs and their cpu nodes).
+
+ A CCI interconnect bus master node must contain the following
+ properties:
+
+ - cci-control-port:
+ Usage: required
+ Value type: <phandle>
+ Definition: a phandle containing the CCI control interface node
+ the master is connected to.
+
+Example:
+
+ cpus {
+ #size-cells = <0>;
+ #address-cells = <1>;
+
+ CPU0: cpu@0 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ cci-control-port = <&cci_control1>;
+ reg = <0x0>;
+ };
+
+ CPU1: cpu@1 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a15";
+ cci-control-port = <&cci_control1>;
+ reg = <0x1>;
+ };
+
+ CPU2: cpu@100 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ cci-control-port = <&cci_control2>;
+ reg = <0x100>;
+ };
+
+ CPU3: cpu@101 {
+ device_type = "cpu";
+ compatible = "arm,cortex-a7";
+ cci-control-port = <&cci_control2>;
+ reg = <0x101>;
+ };
+
+ };
+
+ dma0: dma@3000000 {
+ compatible = "arm,pl330", "arm,primecell";
+ cci-control-port = <&cci_control0>;
+ reg = <0x0 0x3000000 0x0 0x1000>;
+ interrupts = <10>;
+ #dma-cells = <1>;
+ #dma-channels = <8>;
+ #dma-requests = <32>;
+ };
+
+ cci@2c090000 {
+ compatible = "arm,cci-400";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ reg = <0x0 0x2c090000 0 0x1000>;
+ ranges = <0x0 0x0 0x2c090000 0x6000>;
+
+ cci_control0: slave-if@1000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace-lite";
+ reg = <0x1000 0x1000>;
+ };
+
+ cci_control1: slave-if@4000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x4000 0x1000>;
+ };
+
+ cci_control2: slave-if@5000 {
+ compatible = "arm,cci-400-ctrl-if";
+ interface-type = "ace";
+ reg = <0x5000 0x1000>;
+ };
+ };
+
+This CCI node corresponds to a CCI component whose control registers sits
+at address 0x000000002c090000.
+CCI slave interface @0x000000002c091000 is connected to dma controller dma0.
+CCI slave interface @0x000000002c094000 is connected to CPUs {CPU0, CPU1};
+CCI slave interface @0x000000002c095000 is connected to CPUs {CPU2, CPU3};
diff --git a/Documentation/devicetree/bindings/arm/gic.txt b/Documentation/devicetree/bindings/arm/gic.txt
index 3dfb0c0384f5..535774577238 100644
--- a/Documentation/devicetree/bindings/arm/gic.txt
+++ b/Documentation/devicetree/bindings/arm/gic.txt
@@ -49,6 +49,11 @@ Optional
regions, used when the GIC doesn't have banked registers. The offset is
cpu-offset * cpu-nr.
+- arm,routable-irqs : Total number of gic irq inputs which are not directly
+ connected from the peripherals, but are routed dynamically
+ by a crossbar/multiplexer preceding the GIC. The GIC irq
+ input line is assigned dynamically when the corresponding
+ peripheral's crossbar line is mapped.
Example:
intc: interrupt-controller@fff11000 {
@@ -56,6 +61,7 @@ Example:
#interrupt-cells = <3>;
#address-cells = <1>;
interrupt-controller;
+ arm,routable-irqs = <160>;
reg = <0xfff11000 0x1000>,
<0xfff10100 0x100>;
};
diff --git a/Documentation/devicetree/bindings/arm/pmu.txt b/Documentation/devicetree/bindings/arm/pmu.txt
index 343781b9f246..4ce82d045a6b 100644
--- a/Documentation/devicetree/bindings/arm/pmu.txt
+++ b/Documentation/devicetree/bindings/arm/pmu.txt
@@ -16,6 +16,9 @@ Required properties:
"arm,arm1176-pmu"
"arm,arm1136-pmu"
- interrupts : 1 combined interrupt or 1 per core.
+- cluster : a phandle to the cluster to which it belongs
+ If there are more than one cluster with same CPU type
+ then there should be separate PMU nodes per cluster.
Example:
diff --git a/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt
new file mode 100644
index 000000000000..3b8fbf3c00c5
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/rtsm-dcscb.txt
@@ -0,0 +1,19 @@
+ARM Dual Cluster System Configuration Block
+-------------------------------------------
+
+The Dual Cluster System Configuration Block (DCSCB) provides basic
+functionality for controlling clocks, resets and configuration pins in
+the Dual Cluster System implemented by the Real-Time System Model (RTSM).
+
+Required properties:
+
+- compatible : should be "arm,rtsm,dcscb"
+
+- reg : physical base address and the size of the registers window
+
+Example:
+
+ dcscb@60000000 {
+ compatible = "arm,rtsm,dcscb";
+ reg = <0x60000000 0x1000>;
+ };
diff --git a/Documentation/devicetree/bindings/ata/marvell.txt b/Documentation/devicetree/bindings/ata/marvell.txt
index b5cdd20cde9c..1c8351604d38 100644
--- a/Documentation/devicetree/bindings/ata/marvell.txt
+++ b/Documentation/devicetree/bindings/ata/marvell.txt
@@ -1,7 +1,7 @@
* Marvell Orion SATA
Required Properties:
-- compatibility : "marvell,orion-sata"
+- compatibility : "marvell,orion-sata" or "marvell,armada-370-sata"
- reg : Address range of controller
- interrupts : Interrupt controller is using
- nr-ports : Number of SATA ports in use.
diff --git a/Documentation/devicetree/bindings/mailbox/mailbox.txt b/Documentation/devicetree/bindings/mailbox/mailbox.txt
new file mode 100644
index 000000000000..1a2cd3d266db
--- /dev/null
+++ b/Documentation/devicetree/bindings/mailbox/mailbox.txt
@@ -0,0 +1,38 @@
+* Generic Mailbox Controller and client driver bindings
+
+Generic binding to provide a way for Mailbox controller drivers to
+assign appropriate mailbox channel to client drivers.
+
+* Mailbox Controller
+
+Required property:
+- #mbox-cells: Must be at least 1. Number of cells in a mailbox
+ specifier.
+
+Example:
+ mailbox: mailbox {
+ ...
+ #mbox-cells = <1>;
+ };
+
+
+* Mailbox Client
+
+Required property:
+- mboxes: List of phandle and mailbox channel specifiers.
+
+Optional property:
+- mbox-names: List of identifier strings for each mailbox channel
+ required by the client. The use of this property
+ is discouraged in favor of using index in list of
+ 'mboxes' while requesting a mailbox. Instead the
+ platforms may define channel indices, in DT headers,
+ to something legible.
+
+Example:
+ pwr_cntrl: power {
+ ...
+ mbox-names = "pwr-ctrl", "rpc";
+ mboxes = <&mailbox 0
+ &mailbox 1>;
+ };
diff --git a/Documentation/devicetree/bindings/mfd/vexpress-spc.txt b/Documentation/devicetree/bindings/mfd/vexpress-spc.txt
new file mode 100644
index 000000000000..1d71dc2ff151
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/vexpress-spc.txt
@@ -0,0 +1,35 @@
+* ARM Versatile Express Serial Power Controller device tree bindings
+
+Latest ARM development boards implement a power management interface (serial
+power controller - SPC) that is capable of managing power/voltage and
+operating point transitions, through memory mapped registers interface.
+
+On testchips like TC2 it also provides a configuration interface that can
+be used to read/write values which cannot be read/written through simple
+memory mapped reads/writes.
+
+- spc node
+
+ - compatible:
+ Usage: required
+ Value type: <stringlist>
+ Definition: must be
+ "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc"
+ - reg:
+ Usage: required
+ Value type: <prop-encode-array>
+ Definition: A standard property that specifies the base address
+ and the size of the SPC address space
+ - interrupts:
+ Usage: required
+ Value type: <prop-encoded-array>
+ Definition: SPC interrupt configuration. A standard property
+ that follows ePAPR interrupts specifications
+
+Example:
+
+spc: spc@7fff0000 {
+ compatible = "arm,vexpress-spc,v2p-ca15_a7","arm,vexpress-spc";
+ reg = <0 0x7FFF0000 0 0x1000>;
+ interrupts = <0 95 4>;
+};
diff --git a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
index c95ea8278f87..b275be49a546 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
+++ b/Documentation/devicetree/bindings/pinctrl/pinctrl-bindings.txt
@@ -126,3 +126,55 @@ device; they may be grandchildren, for example. Whether this is legal, and
whether there is any interaction between the child and intermediate parent
nodes, is again defined entirely by the binding for the individual pin
controller device.
+
+== Using generic pinconfig options ==
+
+Generic pinconfig parameters can be used by defining a separate node containing
+the applicable parameters (and optional values), like:
+
+pcfg_pull_up: pcfg_pull_up {
+ bias-pull-up;
+ drive-strength = <20>;
+};
+
+This node should then be referenced in the appropriate pinctrl node as a phandle
+and parsed in the driver using the pinconf_generic_parse_dt_config function.
+
+Supported configuration parameters are:
+
+bias-disable - disable any pin bias
+bias-high-impedance - high impedance mode ("third-state", "floating")
+bias-bus-hold - latch weakly
+bias-pull-up - pull up the pin
+bias-pull-down - pull down the pin
+bias-pull-pin-default - use pin-default pull state
+drive-push-pull - drive actively high and low
+drive-open-drain - drive with open drain
+drive-open-source - drive with open source
+drive-strength - sink or source at most X mA
+input-enable - enable input on pin (no effect on output)
+input-disable - disable input on pin (no effect on output)
+input-schmitt-enable - enable schmitt-trigger mode
+input-schmitt-disable - disable schmitt-trigger mode
+input-debounce - debounce mode with debound time X
+low-power-enable - enable low power mode
+low-power-disable - disable low power mode
+output-low - set the pin to output mode with low level
+output-high - set the pin to output mode with high level
+slew-rate - set the slew rate
+
+Arguments for parameters:
+
+- bias-pull-up, -down and -pin-default take as optional argument 0 to disable
+ the pull, on hardware supporting it the pull strength in Ohm. bias-disable
+ will also disable any active pull.
+
+- drive-strength takes as argument the target strength in mA.
+
+- input-debounce takes the debounce time in usec as argument
+ or 0 to disable debouncing
+
+All parameters not listed here, do not take an argument.
+
+More in-depth documentation on these parameters can be found in
+<include/linux/pinctrl/pinconfig-generic.h>
diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
new file mode 100644
index 000000000000..98c16672ab5f
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -0,0 +1,49 @@
+* Generic PM domains
+
+System on chip designs are often divided into multiple PM domains that can be
+used for power gating of selected IP blocks for power saving by reduced leakage
+current.
+
+This device tree binding can be used to bind PM domain consumer devices with
+their PM domains provided by PM domain providers. A PM domain provider can be
+represented by any node in the device tree and can provide one or more PM
+domains. A consumer node can refer to the provider by a phandle and a set of
+phandle arguments (so called PM domain specifiers) of length specified by the
+#power-domain-cells property in the PM domain provider node.
+
+==PM domain providers==
+
+Required properties:
+ - #power-domain-cells : Number of cells in a PM domain specifier;
+ Typically 0 for nodes representing a single PM domain and 1 for nodes
+ providing multiple PM domains (e.g. power controllers), but can be any value
+ as specified by device tree binding documentation of particular provider.
+
+Example:
+
+ power: power-controller@12340000 {
+ compatible = "foo,power-controller";
+ reg = <0x12340000 0x1000>;
+ #power-domain-cells = <1>;
+ };
+
+The node above defines a power controller that is a PM domain provider and
+expects one cell as its phandle argument.
+
+==PM domain consumers==
+
+Required properties:
+ - power-domains : A phandle and PM domain specifier as defined by bindings of
+ the power controller specified by phandle.
+
+Example:
+
+ leaky-device@12350000 {
+ compatible = "foo,i-leak-current";
+ reg = <0x12350000 0x1000>;
+ power-domains = <&power 0>;
+ };
+
+The node above defines a typical PM domain consumer device, which is located
+inside a PM domain with index 0 of a power controller represented by a node
+with the label "power".
diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt
new file mode 100644
index 000000000000..f5db6b72a36f
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/thermal.txt
@@ -0,0 +1,595 @@
+* Thermal Framework Device Tree descriptor
+
+This file describes a generic binding to provide a way of
+defining hardware thermal structure using device tree.
+A thermal structure includes thermal zones and their components,
+such as trip points, polling intervals, sensors and cooling devices
+binding descriptors.
+
+The target of device tree thermal descriptors is to describe only
+the hardware thermal aspects. The thermal device tree bindings are
+not about how the system must control or which algorithm or policy
+must be taken in place.
+
+There are five types of nodes involved to describe thermal bindings:
+- thermal sensors: devices which may be used to take temperature
+ measurements.
+- cooling devices: devices which may be used to dissipate heat.
+- trip points: describe key temperatures at which cooling is recommended. The
+ set of points should be chosen based on hardware limits.
+- cooling maps: used to describe links between trip points and cooling devices;
+- thermal zones: used to describe thermal data within the hardware;
+
+The following is a description of each of these node types.
+
+* Thermal sensor devices
+
+Thermal sensor devices are nodes providing temperature sensing capabilities on
+thermal zones. Typical devices are I2C ADC converters and bandgaps. These are
+nodes providing temperature data to thermal zones. Thermal sensor devices may
+control one or more internal sensors.
+
+Required property:
+- #thermal-sensor-cells: Used to provide sensor device specific information
+ Type: unsigned while referring to it. Typically 0 on thermal sensor
+ Size: one cell nodes with only one sensor, and at least 1 on nodes
+ with several internal sensors, in order
+ to identify uniquely the sensor instances within
+ the IC. See thermal zone binding for more details
+ on how consumers refer to sensor devices.
+
+* Cooling device nodes
+
+Cooling devices are nodes providing control on power dissipation. There
+are essentially two ways to provide control on power dissipation. First
+is by means of regulating device performance, which is known as passive
+cooling. A typical passive cooling is a CPU that has dynamic voltage and
+frequency scaling (DVFS), and uses lower frequencies as cooling states.
+Second is by means of activating devices in order to remove
+the dissipated heat, which is known as active cooling, e.g. regulating
+fan speeds. In both cases, cooling devices shall have a way to determine
+the state of cooling in which the device is.
+
+Any cooling device has a range of cooling states (i.e. different levels
+of heat dissipation). For example a fan's cooling states correspond to
+the different fan speeds possible. Cooling states are referred to by
+single unsigned integers, where larger numbers mean greater heat
+dissipation. The precise set of cooling states associated with a device
+(as referred to be the cooling-min-state and cooling-max-state
+properties) should be defined in a particular device's binding.
+For more examples of cooling devices, refer to the example sections below.
+
+Required properties:
+- cooling-min-state: An integer indicating the smallest
+ Type: unsigned cooling state accepted. Typically 0.
+ Size: one cell
+
+- cooling-max-state: An integer indicating the largest
+ Type: unsigned cooling state accepted.
+ Size: one cell
+
+- #cooling-cells: Used to provide cooling device specific information
+ Type: unsigned while referring to it. Must be at least 2, in order
+ Size: one cell to specify minimum and maximum cooling state used
+ in the reference. The first cell is the minimum
+ cooling state requested and the second cell is
+ the maximum cooling state requested in the reference.
+ See Cooling device maps section below for more details
+ on how consumers refer to cooling devices.
+
+* Trip points
+
+The trip node is a node to describe a point in the temperature domain
+in which the system takes an action. This node describes just the point,
+not the action.
+
+Required properties:
+- temperature: An integer indicating the trip temperature level,
+ Type: signed in millicelsius.
+ Size: one cell
+
+- hysteresis: A low hysteresis value on temperature property (above).
+ Type: unsigned This is a relative value, in millicelsius.
+ Size: one cell
+
+- type: a string containing the trip type. Expected values are:
+ "active": A trip point to enable active cooling
+ "passive": A trip point to enable passive cooling
+ "hot": A trip point to notify emergency
+ "critical": Hardware not reliable.
+ Type: string
+
+* Cooling device maps
+
+The cooling device maps node is a node to describe how cooling devices
+get assigned to trip points of the zone. The cooling devices are expected
+to be loaded in the target system.
+
+Required properties:
+- cooling-device: A phandle of a cooling device with its specifier,
+ Type: phandle + referring to which cooling device is used in this
+ cooling specifier binding. In the cooling specifier, the first cell
+ is the minimum cooling state and the second cell
+ is the maximum cooling state used in this map.
+- trip: A phandle of a trip point node within the same thermal
+ Type: phandle of zone.
+ trip point node
+
+Optional property:
+- contribution: The cooling contribution to the thermal zone of the
+ Type: unsigned referred cooling device at the referred trip point.
+ Size: one cell The contribution is a ratio of the sum
+ of all cooling contributions within a thermal zone.
+
+Note: Using the THERMAL_NO_LIMIT (-1UL) constant in the cooling-device phandle
+limit specifier means:
+(i) - minimum state allowed for minimum cooling state used in the reference.
+(ii) - maximum state allowed for maximum cooling state used in the reference.
+Refer to include/dt-bindings/thermal/thermal.h for definition of this constant.
+
+* Thermal zone nodes
+
+The thermal zone node is the node containing all the required info
+for describing a thermal zone, including its cooling device bindings. The
+thermal zone node must contain, apart from its own properties, one sub-node
+containing trip nodes and one sub-node containing all the zone cooling maps.
+
+Required properties:
+- polling-delay: The maximum number of milliseconds to wait between polls
+ Type: unsigned when checking this thermal zone.
+ Size: one cell
+
+- polling-delay-passive: The maximum number of milliseconds to wait
+ Type: unsigned between polls when performing passive cooling.
+ Size: one cell
+
+- thermal-sensors: A list of thermal sensor phandles and sensor specifier
+ Type: list of used while monitoring the thermal zone.
+ phandles + sensor
+ specifier
+
+- trips: A sub-node which is a container of only trip point nodes
+ Type: sub-node required to describe the thermal zone.
+
+- cooling-maps: A sub-node which is a container of only cooling device
+ Type: sub-node map nodes, used to describe the relation between trips
+ and cooling devices.
+
+Optional property:
+- coefficients: An array of integers (one signed cell) containing
+ Type: array coefficients to compose a linear relation between
+ Elem size: one cell the sensors listed in the thermal-sensors property.
+ Elem type: signed Coefficients defaults to 1, in case this property
+ is not specified. A simple linear polynomial is used:
+ Z = c0 * x0 + c1 + x1 + ... + c(n-1) * x(n-1) + cn.
+
+ The coefficients are ordered and they match with sensors
+ by means of sensor ID. Additional coefficients are
+ interpreted as constant offset.
+
+Note: The delay properties are bound to the maximum dT/dt (temperature
+derivative over time) in two situations for a thermal zone:
+(i) - when passive cooling is activated (polling-delay-passive); and
+(ii) - when the zone just needs to be monitored (polling-delay) or
+when active cooling is activated.
+
+The maximum dT/dt is highly bound to hardware power consumption and dissipation
+capability. The delays should be chosen to account for said max dT/dt,
+such that a device does not cross several trip boundaries unexpectedly
+between polls. Choosing the right polling delays shall avoid having the
+device in temperature ranges that may damage the silicon structures and
+reduce silicon lifetime.
+
+* The thermal-zones node
+
+The "thermal-zones" node is a container for all thermal zone nodes. It shall
+contain only sub-nodes describing thermal zones as in the section
+"Thermal zone nodes". The "thermal-zones" node appears under "/".
+
+* Examples
+
+Below are several examples on how to use thermal data descriptors
+using device tree bindings:
+
+(a) - CPU thermal zone
+
+The CPU thermal zone example below describes how to setup one thermal zone
+using one single sensor as temperature source and many cooling devices and
+power dissipation control sources.
+
+#include <dt-bindings/thermal/thermal.h>
+
+cpus {
+ /*
+ * Here is an example of describing a cooling device for a DVFS
+ * capable CPU. The CPU node describes its four OPPs.
+ * The cooling states possible are 0..3, and they are
+ * used as OPP indexes. The minimum cooling state is 0, which means
+ * all four OPPs can be available to the system. The maximum
+ * cooling state is 3, which means only the lowest OPPs (198MHz@0.85V)
+ * can be available in the system.
+ */
+ cpu0: cpu@0 {
+ ...
+ operating-points = <
+ /* kHz uV */
+ 970000 1200000
+ 792000 1100000
+ 396000 950000
+ 198000 850000
+ >;
+ cooling-min-state = <0>;
+ cooling-max-state = <3>;
+ #cooling-cells = <2>; /* min followed by max */
+ };
+ ...
+};
+
+&i2c1 {
+ ...
+ /*
+ * A simple fan controller which supports 10 speeds of operation
+ * (represented as 0-9).
+ */
+ fan0: fan@0x48 {
+ ...
+ cooling-min-state = <0>;
+ cooling-max-state = <9>;
+ #cooling-cells = <2>; /* min followed by max */
+ };
+};
+
+ocp {
+ ...
+ /*
+ * A simple IC with a single bandgap temperature sensor.
+ */
+ bandgap0: bandgap@0x0000ED00 {
+ ...
+ #thermal-sensor-cells = <0>;
+ };
+};
+
+thermal-zones {
+ cpu-thermal: cpu-thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ thermal-sensors = <&bandgap0>;
+
+ trips {
+ cpu-alert0: cpu-alert {
+ temperature = <90000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "active";
+ };
+ cpu-alert1: cpu-alert {
+ temperature = <100000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ cpu-crit: cpu-crit {
+ temperature = <125000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu-alert0>;
+ cooling-device = <&fan0 THERMAL_NO_LIMITS 4>;
+ };
+ map1 {
+ trip = <&cpu-alert1>;
+ cooling-device = <&fan0 5 THERMAL_NO_LIMITS>;
+ };
+ map2 {
+ trip = <&cpu-alert1>;
+ cooling-device =
+ <&cpu0 THERMAL_NO_LIMITS THERMAL_NO_LIMITS>;
+ };
+ };
+ };
+};
+
+In the example above, the ADC sensor (bandgap0) at address 0x0000ED00 is
+used to monitor the zone 'cpu-thermal' using its sole sensor. A fan
+device (fan0) is controlled via I2C bus 1, at address 0x48, and has ten
+different cooling states 0-9. It is used to remove the heat out of
+the thermal zone 'cpu-thermal' using its cooling states
+from its minimum to 4, when it reaches trip point 'cpu-alert0'
+at 90C, as an example of active cooling. The same cooling device is used at
+'cpu-alert1', but from 5 to its maximum state. The cpu@0 device is also
+linked to the same thermal zone, 'cpu-thermal', as a passive cooling device,
+using all its cooling states at trip point 'cpu-alert1',
+which is a trip point at 100C. On the thermal zone 'cpu-thermal', at the
+temperature of 125C, represented by the trip point 'cpu-crit', the silicon
+is not reliable anymore.
+
+(b) - IC with several internal sensors
+
+The example below describes how to deploy several thermal zones based off a
+single sensor IC, assuming it has several internal sensors. This is a common
+case on SoC designs with several internal IPs that may need different thermal
+requirements, and thus may have their own sensor to monitor or detect internal
+hotspots in their silicon.
+
+#include <dt-bindings/thermal/thermal.h>
+
+ocp {
+ ...
+ /*
+ * A simple IC with several bandgap temperature sensors.
+ */
+ bandgap0: bandgap@0x0000ED00 {
+ ...
+ #thermal-sensor-cells = <1>;
+ };
+};
+
+thermal-zones {
+ cpu-thermal: cpu-thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap0 0>;
+
+ trips {
+ /* each zone within the SoC may have its own trips */
+ cpu-alert: cpu-alert {
+ temperature = <100000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ cpu-crit: cpu-crit {
+ temperature = <125000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ /* each zone within the SoC may have its own cooling */
+ ...
+ };
+ };
+
+ gpu-thermal: gpu-thermal {
+ polling-delay-passive = <120>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap0 1>;
+
+ trips {
+ /* each zone within the SoC may have its own trips */
+ gpu-alert: gpu-alert {
+ temperature = <90000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ gpu-crit: gpu-crit {
+ temperature = <105000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ /* each zone within the SoC may have its own cooling */
+ ...
+ };
+ };
+
+ dsp-thermal: dsp-thermal {
+ polling-delay-passive = <50>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&bandgap0 2>;
+
+ trips {
+ /* each zone within the SoC may have its own trips */
+ dsp-alert: gpu-alert {
+ temperature = <90000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ dsp-crit: gpu-crit {
+ temperature = <135000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ /* each zone within the SoC may have its own cooling */
+ ...
+ };
+ };
+};
+
+In the example above, there is one bandgap IC which has the capability to
+monitor three sensors. The hardware has been designed so that sensors are
+placed on different places in the DIE to monitor different temperature
+hotspots: one for CPU thermal zone, one for GPU thermal zone and the
+other to monitor a DSP thermal zone.
+
+Thus, there is a need to assign each sensor provided by the bandgap IC
+to different thermal zones. This is achieved by means of using the
+#thermal-sensor-cells property and using the first cell of the sensor
+specifier as sensor ID. In the example, then, <bandgap 0> is used to
+monitor CPU thermal zone, <bandgap 1> is used to monitor GPU thermal
+zone and <bandgap 2> is used to monitor DSP thermal zone. Each zone
+may be uncorrelated, having its own dT/dt requirements, trips
+and cooling maps.
+
+
+(c) - Several sensors within one single thermal zone
+
+The example below illustrates how to use more than one sensor within
+one thermal zone.
+
+#include <dt-bindings/thermal/thermal.h>
+
+&i2c1 {
+ ...
+ /*
+ * A simple IC with a single temperature sensor.
+ */
+ adc: sensor@0x49 {
+ ...
+ #thermal-sensor-cells = <0>;
+ };
+};
+
+ocp {
+ ...
+ /*
+ * A simple IC with a single bandgap temperature sensor.
+ */
+ bandgap0: bandgap@0x0000ED00 {
+ ...
+ #thermal-sensor-cells = <0>;
+ };
+};
+
+thermal-zones {
+ cpu-thermal: cpu-thermal {
+ polling-delay-passive = <250>; /* milliseconds */
+ polling-delay = <1000>; /* milliseconds */
+
+ thermal-sensors = <&bandgap0>, /* cpu */
+ <&adc>; /* pcb north */
+
+ /* hotspot = 100 * bandgap - 120 * adc + 484 */
+ coefficients = <100 -120 484>;
+
+ trips {
+ ...
+ };
+
+ cooling-maps {
+ ...
+ };
+ };
+};
+
+In some cases, there is a need to use more than one sensor to extrapolate
+a thermal hotspot in the silicon. The above example illustrates this situation.
+For instance, it may be the case that a sensor external to CPU IP may be placed
+close to CPU hotspot and together with internal CPU sensor, it is used
+to determine the hotspot. Assuming this is the case for the above example,
+the hypothetical extrapolation rule would be:
+ hotspot = 100 * bandgap - 120 * adc + 484
+
+In other context, the same idea can be used to add fixed offset. For instance,
+consider the hotspot extrapolation rule below:
+ hotspot = 1 * adc + 6000
+
+In the above equation, the hotspot is always 6C higher than what is read
+from the ADC sensor. The binding would be then:
+ thermal-sensors = <&adc>;
+
+ /* hotspot = 1 * adc + 6000 */
+ coefficients = <1 6000>;
+
+(d) - Board thermal
+
+The board thermal example below illustrates how to setup one thermal zone
+with many sensors and many cooling devices.
+
+#include <dt-bindings/thermal/thermal.h>
+
+&i2c1 {
+ ...
+ /*
+ * An IC with several temperature sensor.
+ */
+ adc-dummy: sensor@0x50 {
+ ...
+ #thermal-sensor-cells = <1>; /* sensor internal ID */
+ };
+};
+
+thermal-zones {
+ batt-thermal {
+ polling-delay-passive = <500>; /* milliseconds */
+ polling-delay = <2500>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&adc-dummy 4>;
+
+ trips {
+ ...
+ };
+
+ cooling-maps {
+ ...
+ };
+ };
+
+ board-thermal: board-thermal {
+ polling-delay-passive = <1000>; /* milliseconds */
+ polling-delay = <2500>; /* milliseconds */
+
+ /* sensor ID */
+ thermal-sensors = <&adc-dummy 0>, /* pcb top edge */
+ <&adc-dummy 1>, /* lcd */
+ <&adc-dymmy 2>; /* back cover */
+ /*
+ * An array of coefficients describing the sensor
+ * linear relation. E.g.:
+ * z = c1*x1 + c2*x2 + c3*x3
+ */
+ coefficients = <1200 -345 890>;
+
+ trips {
+ /* Trips are based on resulting linear equation */
+ cpu-trip: cpu-trip {
+ temperature = <60000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ gpu-trip: gpu-trip {
+ temperature = <55000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ }
+ lcd-trip: lcp-trip {
+ temperature = <53000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "passive";
+ };
+ crit-trip: crit-trip {
+ temperature = <68000>; /* millicelsius */
+ hysteresis = <2000>; /* millicelsius */
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ map0 {
+ trip = <&cpu-trip>;
+ cooling-device = <&cpu0 0 2>;
+ contribution = <55>;
+ };
+ map1 {
+ trip = <&gpu-trip>;
+ cooling-device = <&gpu0 0 2>;
+ contribution = <20>;
+ };
+ map2 {
+ trip = <&lcd-trip>;
+ cooling-device = <&lcd0 5 10>;
+ contribution = <15>;
+ };
+ };
+ };
+};
+
+The above example is a mix of previous examples, a sensor IP with several internal
+sensors used to monitor different zones, one of them is composed by several sensors and
+with different cooling devices.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index fd8d0d594fc7..954eab8c7fec 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1372,8 +1372,8 @@ may allocate from based on an estimation of its current memory and swap use.
For example, if a task is using all allowed memory, its badness score will be
1000. If it is using half of its allowed memory, its score will be 500.
-There is an additional factor included in the badness score: root
-processes are given 3% extra memory over other tasks.
+There is an additional factor included in the badness score: the current memory
+and swap usage is discounted by 3% for root processes.
The amount of "allowed" memory depends on the context in which the oom killer
was called. If it is due to the memory assigned to the allocating task's cpuset
diff --git a/Documentation/hwmon/k10temp b/Documentation/hwmon/k10temp
index 90956b618025..4dfdc8f83633 100644
--- a/Documentation/hwmon/k10temp
+++ b/Documentation/hwmon/k10temp
@@ -12,6 +12,7 @@ Supported chips:
* AMD Family 12h processors: "Llano" (E2/A4/A6/A8-Series)
* AMD Family 14h processors: "Brazos" (C/E/G/Z-Series)
* AMD Family 15h processors: "Bulldozer" (FX-Series), "Trinity"
+* AMD Family 16h processors: "Kabini"
Prefix: 'k10temp'
Addresses scanned: PCI space
diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801
index d55b8ab2d10f..babe2ef16139 100644
--- a/Documentation/i2c/busses/i2c-i801
+++ b/Documentation/i2c/busses/i2c-i801
@@ -24,6 +24,9 @@ Supported adapters:
* Intel Lynx Point-LP (PCH)
* Intel Avoton (SOC)
* Intel Wellsburg (PCH)
+ * Intel Coleto Creek (PCH)
+ * Intel Wildcat Point-LP (PCH)
+ * Intel BayTrail (SOC)
Datasheets: Publicly available at the Intel website
On Intel Patsburg and later chipsets, both the normal host SMBus controller
diff --git a/Documentation/i2c/busses/i2c-piix4 b/Documentation/i2c/busses/i2c-piix4
index 1e6634f54c50..a370b2047cf3 100644
--- a/Documentation/i2c/busses/i2c-piix4
+++ b/Documentation/i2c/busses/i2c-piix4
@@ -13,7 +13,7 @@ Supported adapters:
* AMD SP5100 (SB700 derivative found on some server mainboards)
Datasheet: Publicly available at the AMD website
http://support.amd.com/us/Embedded_TechDocs/44413.pdf
- * AMD Hudson-2
+ * AMD Hudson-2, CZ
Datasheet: Not publicly available
* Standard Microsystems (SMSC) SLC90E66 (Victory66) southbridge
Datasheet: Publicly available at the SMSC website http://www.smsc.com
diff --git a/Documentation/input/elantech.txt b/Documentation/input/elantech.txt
index 5602eb71ad5d..e1ae127ed099 100644
--- a/Documentation/input/elantech.txt
+++ b/Documentation/input/elantech.txt
@@ -504,9 +504,12 @@ byte 5:
* reg_10
bit 7 6 5 4 3 2 1 0
- 0 0 0 0 0 0 0 A
+ 0 0 0 0 R F T A
A: 1 = enable absolute tracking
+ T: 1 = enable two finger mode auto correct
+ F: 1 = disable ABS Position Filter
+ R: 1 = enable real hardware resolution
6.2 Native absolute mode 6 byte packet format
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/Documentation/ja_JP/HOWTO b/Documentation/ja_JP/HOWTO
index 050d37fe6d40..46ed73593465 100644
--- a/Documentation/ja_JP/HOWTO
+++ b/Documentation/ja_JP/HOWTO
@@ -315,7 +315,7 @@ Andrew Morton が Linux-kernel メーリングリストにカーネルリリー
もし、2.6.x.y カーネルが存在しない場合には、番号が一番大きい 2.6.x が
最新の安定版カーネルです。
-2.6.x.y は "stable" チーム <stable@kernel.org> でメンテされており、必
+2.6.x.y は "stable" チーム <stable@vger.kernel.org> でメンテされており、必
要に応じてリリースされます。通常のリリース期間は 2週間毎ですが、差し迫っ
た問題がなければもう少し長くなることもあります。セキュリティ関連の問題
の場合はこれに対してだいたいの場合、すぐにリリースがされます。
diff --git a/Documentation/ja_JP/stable_kernel_rules.txt b/Documentation/ja_JP/stable_kernel_rules.txt
index 14265837c4ce..9dbda9b5d21e 100644
--- a/Documentation/ja_JP/stable_kernel_rules.txt
+++ b/Documentation/ja_JP/stable_kernel_rules.txt
@@ -50,16 +50,16 @@ linux-2.6.29/Documentation/stable_kernel_rules.txt
-stable ツリーにパッチを送付する手続き-
- - 上記の規則に従っているかを確認した後に、stable@kernel.org にパッチ
+ - 上記の規則に従っているかを確認した後に、stable@vger.kernel.org にパッチ
を送る。
- 送信者はパッチがキューに受け付けられた際には ACK を、却下された場合
には NAK を受け取る。この反応は開発者たちのスケジュールによって、数
日かかる場合がある。
- もし受け取られたら、パッチは他の開発者たちと関連するサブシステムの
メンテナーによるレビューのために -stable キューに追加される。
- - パッチに stable@kernel.org のアドレスが付加されているときには、それ
+ - パッチに stable@vger.kernel.org のアドレスが付加されているときには、それ
が Linus のツリーに入る時に自動的に stable チームに email される。
- - セキュリティパッチはこのエイリアス (stable@kernel.org) に送られるべ
+ - セキュリティパッチはこのエイリアス (stable@vger.kernel.org) に送られるべ
きではなく、代わりに security@kernel.org のアドレスに送られる。
レビューサイクル-
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2fe6e767b3d6..15b24a2be6b1 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1240,6 +1240,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
See comment before ip2_setup() in
drivers/char/ip2/ip2base.c.
+ irqaffinity= [SMP] Set the default irq affinity mask
+ Format:
+ <cpu number>,...,<cpu number>
+ or
+ <cpu number>-<cpu number>
+ (must be a positive range in ascending order)
+ or a mixture
+ <cpu number>,...,<cpu number>-<cpu number>
+
irqfixup [HW]
When an interrupt is not handled search all handlers
for it. Intended to get systems with badly broken
@@ -1456,6 +1465,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
* dump_id: dump IDENTIFY data.
+ * atapi_dmadir: Enable ATAPI DMADIR bridge support
+
+ * disable: Disable this device.
+
If there are multiple matching configurations changing
the same attribute, the last one is used.
@@ -3341,6 +3354,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
that this also can be controlled per-workqueue for
workqueues visible under /sys/bus/workqueue/.
+ workqueue.power_efficient
+ Per-cpu workqueues are generally preferred because
+ they show better performance thanks to cache
+ locality; unfortunately, per-cpu workqueues tend to
+ be more power hungry than unbound workqueues.
+
+ Enabling this makes the per-cpu workqueues which
+ were observed to contribute significantly to power
+ consumption unbound, leading to measurably lower
+ power usage at the cost of small performance
+ overhead.
+
+ The default value of this parameter is determined by
+ the config option CONFIG_WQ_POWER_EFFICIENT_DEFAULT.
+
x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
default x2apic cluster mode on platforms
supporting x2apic.
diff --git a/Documentation/lzo.txt b/Documentation/lzo.txt
new file mode 100644
index 000000000000..ea45dd3901e3
--- /dev/null
+++ b/Documentation/lzo.txt
@@ -0,0 +1,164 @@
+
+LZO stream format as understood by Linux's LZO decompressor
+===========================================================
+
+Introduction
+
+ This is not a specification. No specification seems to be publicly available
+ for the LZO stream format. This document describes what input format the LZO
+ decompressor as implemented in the Linux kernel understands. The file subject
+ of this analysis is lib/lzo/lzo1x_decompress_safe.c. No analysis was made on
+ the compressor nor on any other implementations though it seems likely that
+ the format matches the standard one. The purpose of this document is to
+ better understand what the code does in order to propose more efficient fixes
+ for future bug reports.
+
+Description
+
+ The stream is composed of a series of instructions, operands, and data. The
+ instructions consist in a few bits representing an opcode, and bits forming
+ the operands for the instruction, whose size and position depend on the
+ opcode and on the number of literals copied by previous instruction. The
+ operands are used to indicate :
+
+ - a distance when copying data from the dictionary (past output buffer)
+ - a length (number of bytes to copy from dictionary)
+ - the number of literals to copy, which is retained in variable "state"
+ as a piece of information for next instructions.
+
+ Optionally depending on the opcode and operands, extra data may follow. These
+ extra data can be a complement for the operand (eg: a length or a distance
+ encoded on larger values), or a literal to be copied to the output buffer.
+
+ The first byte of the block follows a different encoding from other bytes, it
+ seems to be optimized for literal use only, since there is no dictionary yet
+ prior to that byte.
+
+ Lengths are always encoded on a variable size starting with a small number
+ of bits in the operand. If the number of bits isn't enough to represent the
+ length, up to 255 may be added in increments by consuming more bytes with a
+ rate of at most 255 per extra byte (thus the compression ratio cannot exceed
+ around 255:1). The variable length encoding using #bits is always the same :
+
+ length = byte & ((1 << #bits) - 1)
+ if (!length) {
+ length = ((1 << #bits) - 1)
+ length += 255*(number of zero bytes)
+ length += first-non-zero-byte
+ }
+ length += constant (generally 2 or 3)
+
+ For references to the dictionary, distances are relative to the output
+ pointer. Distances are encoded using very few bits belonging to certain
+ ranges, resulting in multiple copy instructions using different encodings.
+ Certain encodings involve one extra byte, others involve two extra bytes
+ forming a little-endian 16-bit quantity (marked LE16 below).
+
+ After any instruction except the large literal copy, 0, 1, 2 or 3 literals
+ are copied before starting the next instruction. The number of literals that
+ were copied may change the meaning and behaviour of the next instruction. In
+ practice, only one instruction needs to know whether 0, less than 4, or more
+ literals were copied. This is the information stored in the <state> variable
+ in this implementation. This number of immediate literals to be copied is
+ generally encoded in the last two bits of the instruction but may also be
+ taken from the last two bits of an extra operand (eg: distance).
+
+ End of stream is declared when a block copy of distance 0 is seen. Only one
+ instruction may encode this distance (0001HLLL), it takes one LE16 operand
+ for the distance, thus requiring 3 bytes.
+
+ IMPORTANT NOTE : in the code some length checks are missing because certain
+ instructions are called under the assumption that a certain number of bytes
+ follow because it has already been garanteed before parsing the instructions.
+ They just have to "refill" this credit if they consume extra bytes. This is
+ an implementation design choice independant on the algorithm or encoding.
+
+Byte sequences
+
+ First byte encoding :
+
+ 0..17 : follow regular instruction encoding, see below. It is worth
+ noting that codes 16 and 17 will represent a block copy from
+ the dictionary which is empty, and that they will always be
+ invalid at this place.
+
+ 18..21 : copy 0..3 literals
+ state = (byte - 17) = 0..3 [ copy <state> literals ]
+ skip byte
+
+ 22..255 : copy literal string
+ length = (byte - 17) = 4..238
+ state = 4 [ don't copy extra literals ]
+ skip byte
+
+ Instruction encoding :
+
+ 0 0 0 0 X X X X (0..15)
+ Depends on the number of literals copied by the last instruction.
+ If last instruction did not copy any literal (state == 0), this
+ encoding will be a copy of 4 or more literal, and must be interpreted
+ like this :
+
+ 0 0 0 0 L L L L (0..15) : copy long literal string
+ length = 3 + (L ?: 15 + (zero_bytes * 255) + non_zero_byte)
+ state = 4 (no extra literals are copied)
+
+ If last instruction used to copy between 1 to 3 literals (encoded in
+ the instruction's opcode or distance), the instruction is a copy of a
+ 2-byte block from the dictionary within a 1kB distance. It is worth
+ noting that this instruction provides little savings since it uses 2
+ bytes to encode a copy of 2 other bytes but it encodes the number of
+ following literals for free. It must be interpreted like this :
+
+ 0 0 0 0 D D S S (0..15) : copy 2 bytes from <= 1kB distance
+ length = 2
+ state = S (copy S literals after this block)
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 2) + D + 1
+
+ If last instruction used to copy 4 or more literals (as detected by
+ state == 4), the instruction becomes a copy of a 3-byte block from the
+ dictionary from a 2..3kB distance, and must be interpreted like this :
+
+ 0 0 0 0 D D S S (0..15) : copy 3 bytes from 2..3 kB distance
+ length = 3
+ state = S (copy S literals after this block)
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 2) + D + 2049
+
+ 0 0 0 1 H L L L (16..31)
+ Copy of a block within 16..48kB distance (preferably less than 10B)
+ length = 2 + (L ?: 7 + (zero_bytes * 255) + non_zero_byte)
+ Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S
+ distance = 16384 + (H << 14) + D
+ state = S (copy S literals after this block)
+ End of stream is reached if distance == 16384
+
+ 0 0 1 L L L L L (32..63)
+ Copy of small block within 16kB distance (preferably less than 34B)
+ length = 2 + (L ?: 31 + (zero_bytes * 255) + non_zero_byte)
+ Always followed by exactly one LE16 : D D D D D D D D : D D D D D D S S
+ distance = D + 1
+ state = S (copy S literals after this block)
+
+ 0 1 L D D D S S (64..127)
+ Copy 3-4 bytes from block within 2kB distance
+ state = S (copy S literals after this block)
+ length = 3 + L
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 3) + D + 1
+
+ 1 L L D D D S S (128..255)
+ Copy 5-8 bytes from block within 2kB distance
+ state = S (copy S literals after this block)
+ length = 5 + L
+ Always followed by exactly one byte : H H H H H H H H
+ distance = (H << 3) + D + 1
+
+Authors
+
+ This document was written by Willy Tarreau <w@1wt.eu> on 2014/07/19 during an
+ analysis of the decompression code available in Linux 3.16-rc5. The code is
+ tricky, it is possible that this document contains mistakes or that a few
+ corner cases were overlooked. In any case, please report any doubt, fix, or
+ proposed updates to the author(s) so that the document can be updated.
diff --git a/Documentation/mailbox.txt b/Documentation/mailbox.txt
new file mode 100644
index 000000000000..60f43ff629aa
--- /dev/null
+++ b/Documentation/mailbox.txt
@@ -0,0 +1,122 @@
+ The Common Mailbox Framework
+ Jassi Brar <jaswinder.singh@linaro.org>
+
+ This document aims to help developers write client and controller
+drivers for the API. But before we start, let us note that the
+client (especially) and controller drivers are likely going to be
+very platform specific because the remote firmware is likely to be
+proprietary and implement non-standard protocol. So even if two
+platforms employ, say, PL320 controller, the client drivers can't
+be shared across them. Even the PL320 driver might need to accommodate
+some platform specific quirks. So the API is meant mainly to avoid
+similar copies of code written for each platform. Having said that,
+nothing prevents the remote f/w to also be Linux based and use the
+same api there. However none of that helps us locally because we only
+ever deal at client's protocol level.
+ Some of the choices made during implementation are the result of this
+peculiarity of this "common" framework.
+
+
+
+ Part 1 - Controller Driver (See include/linux/mailbox_controller.h)
+
+ Allocate mbox_controller and the array of mbox_chan.
+Populate mbox_chan_ops, except peek_data() all are mandatory.
+The controller driver might know a message has been consumed
+by the remote by getting an IRQ or polling some hardware flag
+or it can never know (the client knows by way of the protocol).
+The method in order of preference is IRQ -> Poll -> None, which
+the controller driver should set via 'txdone_irq' or 'txdone_poll'
+or neither.
+
+
+ Part 2 - Client Driver (See include/linux/mailbox_client.h)
+
+ The client might want to operate in blocking mode (synchronously
+send a message through before returning) or non-blocking/async mode (submit
+a message and a callback function to the API and return immediately).
+
+
+struct demo_client {
+ struct mbox_client cl;
+ struct mbox_chan *mbox;
+ struct completion c;
+ bool async;
+ /* ... */
+};
+
+/*
+ * This is the handler for data received from remote. The behaviour is purely
+ * dependent upon the protocol. This is just an example.
+ */
+static void message_from_remote(struct mbox_client *cl, void *mssg)
+{
+ struct demo_client *dc = container_of(mbox_client,
+ struct demo_client, cl);
+ if (dc->aysnc) {
+ if (is_an_ack(mssg)) {
+ /* An ACK to our last sample sent */
+ return; /* Or do something else here */
+ } else { /* A new message from remote */
+ queue_req(mssg);
+ }
+ } else {
+ /* Remote f/w sends only ACK packets on this channel */
+ return;
+ }
+}
+
+static void sample_sent(struct mbox_client *cl, void *mssg, int r)
+{
+ struct demo_client *dc = container_of(mbox_client,
+ struct demo_client, cl);
+ complete(&dc->c);
+}
+
+static void client_demo(struct platform_device *pdev)
+{
+ struct demo_client *dc_sync, *dc_async;
+ /* The controller already knows async_pkt and sync_pkt */
+ struct async_pkt ap;
+ struct sync_pkt sp;
+
+ dc_sync = kzalloc(sizeof(*dc_sync), GFP_KERNEL);
+ dc_async = kzalloc(sizeof(*dc_async), GFP_KERNEL);
+
+ /* Populate non-blocking mode client */
+ dc_async->cl.dev = &pdev->dev;
+ dc_async->cl.rx_callback = message_from_remote;
+ dc_async->cl.tx_done = sample_sent;
+ dc_async->cl.tx_block = false;
+ dc_async->cl.tx_tout = 0; /* doesn't matter here */
+ dc_async->cl.knows_txdone = false; /* depending upon protocol */
+ dc_async->async = true;
+ init_completion(&dc_async->c);
+
+ /* Populate blocking mode client */
+ dc_sync->cl.dev = &pdev->dev;
+ dc_sync->cl.rx_callback = message_from_remote;
+ dc_sync->cl.tx_done = NULL; /* operate in blocking mode */
+ dc_sync->cl.tx_block = true;
+ dc_sync->cl.tx_tout = 500; /* by half a second */
+ dc_sync->cl.knows_txdone = false; /* depending upon protocol */
+ dc_sync->async = false;
+
+ /* ASync mailbox is listed second in 'mboxes' property */
+ dc_async->mbox = mbox_request_channel(&dc_async->cl, 1);
+ /* Populate data packet */
+ /* ap.xxx = 123; etc */
+ /* Send async message to remote */
+ mbox_send_message(dc_async->mbox, &ap);
+
+ /* Sync mailbox is listed first in 'mboxes' property */
+ dc_sync->mbox = mbox_request_channel(&dc_sync->cl, 0);
+ /* Populate data packet */
+ /* sp.abc = 123; etc */
+ /* Send message to remote in blocking mode */
+ mbox_send_message(dc_sync->mbox, &sp);
+ /* At this point 'sp' has been sent */
+
+ /* Now wait for async chan to be done */
+ wait_for_completion(&dc_async->c);
+}
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 3458d6343e01..a59ee432a98f 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -478,6 +478,15 @@ tcp_syn_retries - INTEGER
tcp_timestamps - BOOLEAN
Enable timestamps as defined in RFC1323.
+tcp_min_tso_segs - INTEGER
+ Minimal number of segments per TSO frame.
+ Since linux-3.12, TCP does an automatic sizing of TSO frames,
+ depending on flow rate, instead of filling 64Kbytes packets.
+ For specific usages, it's possible to force TCP to build big
+ TSO frames. Note that TCP stack might split too big TSO packets
+ if available window is too small.
+ Default: 2
+
tcp_tso_win_divisor - INTEGER
This allows control over what percentage of the congestion window
can be consumed by a single TSO frame.
@@ -562,9 +571,6 @@ tcp_limit_output_bytes - INTEGER
typical pfifo_fast qdiscs.
tcp_limit_output_bytes limits the number of bytes on qdisc
or device to reduce artificial RTT/cwnd and reduce bufferbloat.
- Note: For GSO/TSO enabled flows, we try to have at least two
- packets in flight. Reducing tcp_limit_output_bytes might also
- reduce the size of individual GSO packet (64KB being the max)
Default: 131072
tcp_challenge_ack_limit - INTEGER
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 23dd80e82b8e..0f4376ec8852 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -123,6 +123,16 @@ Transmission process is similar to capture as shown below.
[shutdown] close() --------> destruction of the transmission socket and
deallocation of all associated resources.
+Socket creation and destruction is also straight forward, and is done
+the same way as in capturing described in the previous paragraph:
+
+ int fd = socket(PF_PACKET, mode, 0);
+
+The protocol can optionally be 0 in case we only want to transmit
+via this socket, which avoids an expensive call to packet_rcv().
+In this case, you also need to bind(2) the TX_RING with sll_protocol = 0
+set. Otherwise, htons(ETH_P_ALL) or any other protocol, for example.
+
Binding the socket to your network interface is mandatory (with zero copy) to
know the header size of frames used in the circular buffer.
diff --git a/Documentation/parisc/registers b/Documentation/parisc/registers
index dd3caddd1ad9..10c7d1730f5d 100644
--- a/Documentation/parisc/registers
+++ b/Documentation/parisc/registers
@@ -78,6 +78,14 @@ Shadow Registers used by interruption handler code
TOC enable bit 1
=========================================================================
+
+The PA-RISC architecture defines 7 registers as "shadow registers".
+Those are used in RETURN FROM INTERRUPTION AND RESTORE instruction to reduce
+the state save and restore time by eliminating the need for general register
+(GR) saves and restores in interruption handlers.
+Shadow registers are the GRs 1, 8, 9, 16, 17, 24, and 25.
+
+=========================================================================
Register usage notes, originally from John Marvin, with some additional
notes from Randolph Chung.
diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index 447fd4cd54ec..c8763806c65e 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -203,15 +203,8 @@ using a certain resistor value - pull up and pull down - so that the pin has a
stable value when nothing is driving the rail it is connected to, or when it's
unconnected.
-Pin configuration can be programmed either using the explicit APIs described
-immediately below, or by adding configuration entries into the mapping table;
-see section "Board/machine configuration" below.
-
-For example, a platform may do the following to pull up a pin to VDD:
-
-#include <linux/pinctrl/consumer.h>
-
-ret = pin_config_set("foo-dev", "FOO_GPIO_PIN", PLATFORM_X_PULL_UP);
+Pin configuration can be programmed by adding configuration entries into the
+mapping table; see section "Board/machine configuration" below.
The format and meaning of the configuration parameter, PLATFORM_X_PULL_UP
above, is entirely defined by the pin controller driver.
diff --git a/Documentation/sound/alsa/ALSA-Configuration.txt b/Documentation/sound/alsa/ALSA-Configuration.txt
index 95731a08f257..8f08b2a71791 100644
--- a/Documentation/sound/alsa/ALSA-Configuration.txt
+++ b/Documentation/sound/alsa/ALSA-Configuration.txt
@@ -2026,8 +2026,8 @@ Prior to version 0.9.0rc4 options had a 'snd_' prefix. This was removed.
-------------------
Module for sound cards based on the Asus AV66/AV100/AV200 chips,
- i.e., Xonar D1, DX, D2, D2X, DS, Essence ST (Deluxe), Essence STX,
- HDAV1.3 (Deluxe), and HDAV1.3 Slim.
+ i.e., Xonar D1, DX, D2, D2X, DS, DSX, Essence ST (Deluxe),
+ Essence STX (II), HDAV1.3 (Deluxe), and HDAV1.3 Slim.
This module supports autoprobe and multiple cards.
diff --git a/Documentation/stable_kernel_rules.txt b/Documentation/stable_kernel_rules.txt
index b0714d8f678a..8dfb6a5f427d 100644
--- a/Documentation/stable_kernel_rules.txt
+++ b/Documentation/stable_kernel_rules.txt
@@ -29,6 +29,9 @@ Rules on what kind of patches are accepted, and which ones are not, into the
Procedure for submitting patches to the -stable tree:
+ - If the patch covers files in net/ or drivers/net please follow netdev stable
+ submission guidelines as described in
+ Documentation/networking/netdev-FAQ.txt
- Send the patch, after verifying that it follows the above rules, to
stable@vger.kernel.org. You must note the upstream commit ID in the
changelog of your submission, as well as the kernel version you wish
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index ccd42589e124..8d90c42e5db6 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -289,13 +289,24 @@ Default value is "/sbin/hotplug".
kptr_restrict:
This toggle indicates whether restrictions are placed on
-exposing kernel addresses via /proc and other interfaces. When
-kptr_restrict is set to (0), there are no restrictions. When
-kptr_restrict is set to (1), the default, kernel pointers
-printed using the %pK format specifier will be replaced with 0's
-unless the user has CAP_SYSLOG. When kptr_restrict is set to
-(2), kernel pointers printed using %pK will be replaced with 0's
-regardless of privileges.
+exposing kernel addresses via /proc and other interfaces.
+
+When kptr_restrict is set to (0), the default, there are no restrictions.
+
+When kptr_restrict is set to (1), kernel pointers printed using the %pK
+format specifier will be replaced with 0's unless the user has CAP_SYSLOG
+and effective user and group ids are equal to the real ids. This is
+because %pK checks are done at read() time rather than open() time, so
+if permissions are elevated between the open() and the read() (e.g via
+a setuid binary) then %pK will not leak kernel pointers to unprivileged
+users. Note, this is a temporary solution only. The correct long-term
+solution is to do the permission checks at open() time. Consider removing
+world read permissions from files that use %pK, and using dmesg_restrict
+to protect against uses of %pK in dmesg(8) if leaking kernel pointer
+values to unprivileged users is a concern.
+
+When kptr_restrict is set to (2), kernel pointers printed using
+%pK will be replaced with 0's regardless of privileges.
==============================================================
@@ -427,6 +438,32 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
==============================================================
+perf_cpu_time_max_percent:
+
+Hints to the kernel how much CPU time it should be allowed to
+use to handle perf sampling events. If the perf subsystem
+is informed that its samples are exceeding this limit, it
+will drop its sampling frequency to attempt to reduce its CPU
+usage.
+
+Some perf sampling happens in NMIs. If these samples
+unexpectedly take too long to execute, the NMIs can become
+stacked up next to each other so much that nothing else is
+allowed to execute.
+
+0: disable the mechanism. Do not monitor or correct perf's
+ sampling rate no matter how CPU time it takes.
+
+1-100: attempt to throttle perf's sample rate to this
+ percentage of CPU. Note: the kernel calculates an
+ "expected" length of each sample event. 100 here means
+ 100% of that expected length. Even if this is set to
+ 100, you may still see sample throttling if this
+ length is exceeded. Set to 0 if you truly do not care
+ how much CPU is consumed.
+
+==============================================================
+
pid_max:
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index a71bd5b90fe8..37c54863f611 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -142,6 +142,11 @@ temperature) and throttle appropriate devices.
This is an optional feature where some platforms can choose not to
provide this data.
.governor_name: Name of the thermal governor used for this zone
+ .no_hwmon: a boolean to indicate if the thermal to hwmon sysfs interface
+ is required. when no_hwmon == false, a hwmon sysfs interface
+ will be created. when no_hwmon == true, nothing will be done.
+ In case the thermal_zone_params is NULL, the hwmon interface
+ will be created (for backward compatibility).
.num_tbps: Number of thermal_bind_params entries for this zone
.tbp: thermal_bind_params entries
diff --git a/Documentation/video4linux/gspca.txt b/Documentation/video4linux/gspca.txt
index 1e6b6531bbcc..d2ba80bb7af5 100644
--- a/Documentation/video4linux/gspca.txt
+++ b/Documentation/video4linux/gspca.txt
@@ -55,6 +55,7 @@ zc3xx 0458:700f Genius VideoCam Web V2
sonixj 0458:7025 Genius Eye 311Q
sn9c20x 0458:7029 Genius Look 320s
sonixj 0458:702e Genius Slim 310 NB
+sn9c20x 0458:7045 Genius Look 1320 V2
sn9c20x 0458:704a Genius Slim 1320
sn9c20x 0458:704c Genius i-Look 1321
sn9c20x 045e:00f4 LifeCam VX-6000 (SN9C20x + OV9650)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 5f91eda91647..257a1f1eecc7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl.
4.4 KVM_CHECK_EXTENSION
-Capability: basic
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
Architectures: all
-Type: system ioctl
+Type: system ioctl, vm ioctl
Parameters: extension identifier (KVM_CAP_*)
Returns: 0 if unsupported; 1 (or some other positive integer) if supported
@@ -160,6 +160,9 @@ receives an integer that describes the extension availability.
Generally 0 means no and 1 means yes, but some extensions may report
additional information in the integer return value.
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
4.5 KVM_GET_VCPU_MMAP_SIZE
@@ -280,7 +283,7 @@ kvm_run' (see below).
4.11 KVM_GET_REGS
Capability: basic
-Architectures: all except ARM
+Architectures: all except ARM, arm64
Type: vcpu ioctl
Parameters: struct kvm_regs (out)
Returns: 0 on success, -1 on error
@@ -301,7 +304,7 @@ struct kvm_regs {
4.12 KVM_SET_REGS
Capability: basic
-Architectures: all except ARM
+Architectures: all except ARM, arm64
Type: vcpu ioctl
Parameters: struct kvm_regs (in)
Returns: 0 on success, -1 on error
@@ -587,7 +590,7 @@ struct kvm_fpu {
4.24 KVM_CREATE_IRQCHIP
Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, ARM
+Architectures: x86, ia64, ARM, arm64
Type: vm ioctl
Parameters: none
Returns: 0 on success, -1 on error
@@ -595,14 +598,14 @@ Returns: 0 on success, -1 on error
Creates an interrupt controller model in the kernel. On x86, creates a virtual
ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
local APIC. IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM, a GIC is
+only go to the IOAPIC. On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
created.
4.25 KVM_IRQ_LINE
Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, arm
+Architectures: x86, ia64, arm, arm64
Type: vm ioctl
Parameters: struct kvm_irq_level
Returns: 0 on success, -1 on error
@@ -612,9 +615,10 @@ On some architectures it is required that an interrupt controller model has
been previously created with KVM_CREATE_IRQCHIP. Note that edge-triggered
interrupts require the level to be set to 1 and then back to 0.
-ARM can signal an interrupt either at the CPU level, or at the in-kernel irqchip
-(GIC), and for in-kernel irqchip can tell the GIC to use PPIs designated for
-specific cpus. The irq field is interpreted like this:
+ARM/arm64 can signal an interrupt either at the CPU level, or at the
+in-kernel irqchip (GIC), and for in-kernel irqchip can tell the GIC to
+use PPIs designated for specific cpus. The irq field is interpreted
+like this:
 bits: | 31 ... 24 | 23 ... 16 | 15 ... 0 |
field: | irq_type | vcpu_index | irq_id |
@@ -968,18 +972,20 @@ uniprocessor guests).
Possible values are:
- - KVM_MP_STATE_RUNNABLE: the vcpu is currently running
+ - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86, ia64]
- KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP)
- which has not yet received an INIT signal
+ which has not yet received an INIT signal [x86,
+ ia64]
- KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is
- now ready for a SIPI
+ now ready for a SIPI [x86, ia64]
- KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and
- is waiting for an interrupt
+ is waiting for an interrupt [x86, ia64]
- KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector
- accessible via KVM_GET_VCPU_EVENTS)
+ accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
-This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
4.39 KVM_SET_MP_STATE
@@ -993,8 +999,9 @@ Returns: 0 on success; -1 on error
Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
arguments.
-This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
-irqchip, the multiprocessing state must be maintained by userspace.
+On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+in-kernel irqchip, the multiprocessing state must be maintained by userspace on
+these architectures.
4.40 KVM_SET_IDENTITY_MAP_ADDR
@@ -1121,9 +1128,9 @@ struct kvm_cpuid2 {
struct kvm_cpuid_entry2 entries[0];
};
-#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX 1
-#define KVM_CPUID_FLAG_STATEFUL_FUNC 2
-#define KVM_CPUID_FLAG_STATE_READ_NEXT 4
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
struct kvm_cpuid_entry2 {
__u32 function;
@@ -1831,6 +1838,22 @@ ARM 32-bit VFP control registers have the following id bit patterns:
ARM 64-bit FP registers have the following id bit patterns:
0x4030 0000 0012 0 <regno:12>
+
+arm64 registers are mapped using the lower 32 bits. The upper 16 of
+that is the register group type, or coprocessor number:
+
+arm64 core/FP-SIMD registers have the following id bit patterns. Note
+that the size of the access is variable, as the kvm_regs structure
+contains elements ranging from 32 to 128 bits. The index is a 32bit
+value in the kvm_regs structure seen as a 32bit array.
+ 0x60x0 0000 0010 <index into the kvm_regs struct:16>
+
+arm64 CCSIDR registers are demultiplexed by CSSELR value:
+ 0x6020 0000 0011 00 <csselr:8>
+
+arm64 system registers have the following id bit patterns:
+ 0x6030 0000 0013 <op0:2> <op1:3> <crn:4> <crm:4> <op2:3>
+
4.69 KVM_GET_ONE_REG
Capability: KVM_CAP_ONE_REG
@@ -2264,7 +2287,7 @@ current state. "addr" is ignored.
4.77 KVM_ARM_VCPU_INIT
Capability: basic
-Architectures: arm
+Architectures: arm, arm64
Type: vcpu ioctl
Parameters: struct struct kvm_vcpu_init (in)
Returns: 0 on success; -1 on error
@@ -2283,12 +2306,14 @@ should be created before this ioctl is invoked.
Possible features:
- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
Depends on KVM_CAP_ARM_PSCI.
+ - KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
+ Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
4.78 KVM_GET_REG_LIST
Capability: basic
-Architectures: arm
+Architectures: arm, arm64
Type: vcpu ioctl
Parameters: struct kvm_reg_list (in/out)
Returns: 0 on success; -1 on error
@@ -2305,10 +2330,10 @@ This ioctl returns the guest registers that are supported for the
KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
-4.80 KVM_ARM_SET_DEVICE_ADDR
+4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
-Architectures: arm
+Architectures: arm, arm64
Type: vm ioctl
Parameters: struct kvm_arm_device_address (in)
Returns: 0 on success, -1 on error
@@ -2329,20 +2354,25 @@ can access emulated or directly exposed devices, which the host kernel needs
to know about. The id field is an architecture specific identifier for a
specific device.
-ARM divides the id field into two parts, a device id and an address type id
-specific to the individual device.
+ARM/arm64 divides the id field into two parts, a device id and an
+address type id specific to the individual device.
 bits: | 63 ... 32 | 31 ... 16 | 15 ... 0 |
field: | 0x00000000 | device id | addr type id |
-ARM currently only require this when using the in-kernel GIC support for the
-hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2 as the device id. When
-setting the base address for the guest's mapping of the VGIC virtual CPU
-and distributor interface, the ioctl must be called after calling
-KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling
-this ioctl twice for any of the base addresses will return -EEXIST.
+ARM/arm64 currently only require this when using the in-kernel GIC
+support for the hardware VGIC features, using KVM_ARM_DEVICE_VGIC_V2
+as the device id. When setting the base address for the guest's
+mapping of the VGIC virtual CPU and distributor interface, the ioctl
+must be called after calling KVM_CREATE_IRQCHIP, but before calling
+KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the
+base addresses will return -EEXIST.
+
+Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API
+should be used instead.
-4.82 KVM_PPC_RTAS_DEFINE_TOKEN
+
+4.86 KVM_PPC_RTAS_DEFINE_TOKEN
Capability: KVM_CAP_PPC_RTAS
Architectures: ppc
@@ -2612,6 +2642,21 @@ It gets triggered whenever both KVM_CAP_PPC_EPR are enabled and an
external interrupt has just been delivered into the guest. User space
should put the acknowledged interrupt vector into the 'epr' field.
+ /* KVM_EXIT_SYSTEM_EVENT */
+ struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN 1
+#define KVM_SYSTEM_EVENT_RESET 2
+ __u32 type;
+ __u64 flags;
+ } system_event;
+
+If exit_reason is KVM_EXIT_SYSTEM_EVENT then the vcpu has triggered
+a system-level event using some architecture specific mechanism (hypercall
+or some special instruction). In case of ARM/ARM64, this is triggered using
+HVC instruction based PSCI call from the vcpu. The 'type' field describes
+the system-level event type. The 'flags' field describes architecture
+specific flags for the system-level event.
+
/* Fix the size of the union. */
char padding[256];
};
@@ -2641,6 +2686,77 @@ and usually define the validity of a groups of registers. (e.g. one bit
};
+4.81 KVM_GET_EMULATED_CPUID
+
+Capability: KVM_CAP_EXT_EMUL_CPUID
+Architectures: x86
+Type: system ioctl
+Parameters: struct kvm_cpuid2 (in/out)
+Returns: 0 on success, -1 on error
+
+struct kvm_cpuid2 {
+ __u32 nent;
+ __u32 flags;
+ struct kvm_cpuid_entry2 entries[0];
+};
+
+The member 'flags' is used for passing flags from userspace.
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX BIT(0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC BIT(1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT BIT(2)
+
+struct kvm_cpuid_entry2 {
+ __u32 function;
+ __u32 index;
+ __u32 flags;
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+ __u32 padding[3];
+};
+
+This ioctl returns x86 cpuid features which are emulated by
+kvm.Userspace can use the information returned by this ioctl to query
+which features are emulated by kvm instead of being present natively.
+
+Userspace invokes KVM_GET_EMULATED_CPUID by passing a kvm_cpuid2
+structure with the 'nent' field indicating the number of entries in
+the variable-size array 'entries'. If the number of entries is too low
+to describe the cpu capabilities, an error (E2BIG) is returned. If the
+number is too high, the 'nent' field is adjusted and an error (ENOMEM)
+is returned. If the number is just right, the 'nent' field is adjusted
+to the number of valid entries in the 'entries' array, which is then
+filled.
+
+The entries returned are the set CPUID bits of the respective features
+which kvm emulates, as returned by the CPUID instruction, with unknown
+or unsupported feature bits cleared.
+
+Features like x2apic, for example, may not be present in the host cpu
+but are exposed by kvm in KVM_GET_SUPPORTED_CPUID because they can be
+emulated efficiently and thus not included here.
+
+The fields in each entry are defined as follows:
+
+ function: the eax value used to obtain the entry
+ index: the ecx value used to obtain the entry (for entries that are
+ affected by ecx)
+ flags: an OR of zero or more of the following:
+ KVM_CPUID_FLAG_SIGNIFCANT_INDEX:
+ if the index field is valid
+ KVM_CPUID_FLAG_STATEFUL_FUNC:
+ if cpuid for this function returns different values for successive
+ invocations; there will be several entries with the same function,
+ all with this flag set
+ KVM_CPUID_FLAG_STATE_READ_NEXT:
+ for KVM_CPUID_FLAG_STATEFUL_FUNC entries, set if this entry is
+ the first entry to be read by a cpu
+ eax, ebx, ecx, edx: the values returned by the cpuid instruction for
+ this function/index combination
+
+
6. Capabilities that can be enabled
-----------------------------------
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
new file mode 100644
index 000000000000..df8b0c7540b6
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -0,0 +1,83 @@
+ARM Virtual Generic Interrupt Controller (VGIC)
+===============================================
+
+Device types supported:
+ KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
+
+Only one VGIC instance may be instantiated through either this API or the
+legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt
+controller, requiring emulated user-space devices to inject interrupts to the
+VGIC instead of directly to CPUs.
+
+Groups:
+ KVM_DEV_ARM_VGIC_GRP_ADDR
+ Attributes:
+ KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
+ Base address in the guest physical address space of the GIC distributor
+ register mappings.
+
+ KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
+ Base address in the guest physical address space of the GIC virtual cpu
+ interface register mappings.
+
+ KVM_DEV_ARM_VGIC_GRP_DIST_REGS
+ Attributes:
+ The attr field of kvm_device_attr encodes two values:
+ bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
+ values: | reserved | cpu id | offset |
+
+ All distributor regs are (rw, 32-bit)
+
+ The offset is relative to the "Distributor base address" as defined in the
+ GICv2 specs. Getting or setting such a register has the same effect as
+ reading or writing the register on the actual hardware from the cpu
+ specified with cpu id field. Note that most distributor fields are not
+ banked, but return the same value regardless of the cpu id used to access
+ the register.
+ Limitations:
+ - Priorities are not implemented, and registers are RAZ/WI
+ Errors:
+ -ENODEV: Getting or setting this register is not yet supported
+ -EBUSY: One or more VCPUs are running
+
+ KVM_DEV_ARM_VGIC_GRP_CPU_REGS
+ Attributes:
+ The attr field of kvm_device_attr encodes two values:
+ bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
+ values: | reserved | cpu id | offset |
+
+ All CPU interface regs are (rw, 32-bit)
+
+ The offset specifies the offset from the "CPU interface base address" as
+ defined in the GICv2 specs. Getting or setting such a register has the
+ same effect as reading or writing the register on the actual hardware.
+
+ The Active Priorities Registers APRn are implementation defined, so we set a
+ fixed format for our implementation that fits with the model of a "GICv2
+ implementation without the security extensions" which we present to the
+ guest. This interface always exposes four register APR[0-3] describing the
+ maximum possible 128 preemption levels. The semantics of the register
+ indicate if any interrupts in a given preemption level are in the active
+ state by setting the corresponding bit.
+
+ Thus, preemption level X has one or more active interrupts if and only if:
+
+ APRn[X mod 32] == 0b1, where n = X / 32
+
+ Bits for undefined preemption levels are RAZ/WI.
+
+ Limitations:
+ - Priorities are not implemented, and registers are RAZ/WI
+ Errors:
+ -ENODEV: Getting or setting this register is not yet supported
+ -EBUSY: One or more VCPUs are running
+
+ KVM_DEV_ARM_VGIC_GRP_NR_IRQS
+ Attributes:
+ A value describing the number of interrupts (SGI, PPI and SPI) for
+ this GIC instance, ranging from 64 to 1024, in increments of 32.
+
+ Errors:
+ -EINVAL: Value set is out of the expected range
+ -EBUSY: Value has already be set, or GIC has already been initialized
+ with default values.
diff --git a/Documentation/virtual/kvm/devices/vfio.txt b/Documentation/virtual/kvm/devices/vfio.txt
new file mode 100644
index 000000000000..ef51740c67ca
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/vfio.txt
@@ -0,0 +1,22 @@
+VFIO virtual device
+===================
+
+Device types supported:
+ KVM_DEV_TYPE_VFIO
+
+Only one VFIO instance may be created per VM. The created device
+tracks VFIO groups in use by the VM and features of those groups
+important to the correctness and acceleration of the VM. As groups
+are enabled and disabled for use by the VM, KVM should be updated
+about their presence. When registered with KVM, a reference to the
+VFIO-group is held by KVM.
+
+Groups:
+ KVM_DEV_VFIO_GROUP
+
+KVM_DEV_VFIO_GROUP attributes:
+ KVM_DEV_VFIO_GROUP_ADD: Add a VFIO group to VFIO-KVM device tracking
+ KVM_DEV_VFIO_GROUP_DEL: Remove a VFIO group from VFIO-KVM device tracking
+
+For each, kvm_device_attr.addr points to an int32_t file descriptor
+for the VFIO group.
diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt
index 41b7ac9884b5..ba035c33d01c 100644
--- a/Documentation/virtual/kvm/locking.txt
+++ b/Documentation/virtual/kvm/locking.txt
@@ -132,10 +132,14 @@ See the comments in spte_has_volatile_bits() and mmu_spte_update().
------------
Name: kvm_lock
-Type: raw_spinlock
+Type: spinlock_t
Arch: any
Protects: - vm_list
- - hardware virtualization enable/disable
+
+Name: kvm_count_lock
+Type: raw_spinlock_t
+Arch: any
+Protects: - hardware virtualization enable/disable
Comment: 'raw' because hardware enabling/disabling must be atomic /wrt
migration.
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 881582f75c9c..bd4370487b07 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -12,6 +12,8 @@ ffffc90000000000 - ffffe8ffffffffff (=45 bits) vmalloc/ioremap space
ffffe90000000000 - ffffe9ffffffffff (=40 bits) hole
ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
... unused hole ...
+ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
+... unused hole ...
ffffffff80000000 - ffffffffa0000000 (=512 MB) kernel text mapping, from phys 0
ffffffffa0000000 - ffffffffff5fffff (=1525 MB) module mapping space
ffffffffff600000 - ffffffffffdfffff (=8 MB) vsyscalls
diff --git a/Documentation/zh_CN/HOWTO b/Documentation/zh_CN/HOWTO
index 7fba5aab9ef9..7599eb38b764 100644
--- a/Documentation/zh_CN/HOWTO
+++ b/Documentation/zh_CN/HOWTO
@@ -237,7 +237,7 @@ kernel.org网站的pub/linux/kernel/v2.6/目录下找到它。它的开发遵循
如果没有2.6.x.y版本内核存在,那么最新的2.6.x版本内核就相当于是当前的稳定
版内核。
-2.6.x.y版本由“稳定版”小组(邮件地址<stable@kernel.org>)维护,一般隔周发
+2.6.x.y版本由“稳定版”小组(邮件地址<stable@vger.kernel.org>)维护,一般隔周发
布新版本。
内核源码中的Documentation/stable_kernel_rules.txt文件具体描述了可被稳定
diff --git a/Documentation/zh_CN/stable_kernel_rules.txt b/Documentation/zh_CN/stable_kernel_rules.txt
index b5b9b0ab02fd..26ea5ed7cd9c 100644
--- a/Documentation/zh_CN/stable_kernel_rules.txt
+++ b/Documentation/zh_CN/stable_kernel_rules.txt
@@ -42,7 +42,7 @@ Documentation/stable_kernel_rules.txt 的中文翻译
向稳定版代码树提交补丁的过程:
- - 在确认了补丁符合以上的规则后,将补丁发送到stable@kernel.org。
+ - 在确认了补丁符合以上的规则后,将补丁发送到stable@vger.kernel.org。
- 如果补丁被接受到队列里,发送者会收到一个ACK回复,如果没有被接受,收
到的是NAK回复。回复需要几天的时间,这取决于开发者的时间安排。
- 被接受的补丁会被加到稳定版本队列里,等待其他开发者的审查。