aboutsummaryrefslogtreecommitdiff
path: root/Documentation/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/virt/kvm')
-rw-r--r--Documentation/virt/kvm/api.rst68
-rw-r--r--Documentation/virt/kvm/locking.rst14
2 files changed, 56 insertions, 26 deletions
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index dae68e68ca23..a6729c8cf063 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -3357,6 +3357,7 @@ flags which can include the following:
- KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
- KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
- KVM_GUESTDBG_EXIT_PENDING: trigger an immediate guest exit [s390]
+ - KVM_GUESTDBG_BLOCKIRQ: avoid injecting interrupts/NMI/SMI [x86]
For example KVM_GUESTDBG_USE_SW_BP indicates that software breakpoints
are enabled in memory so we need to ensure breakpoint exceptions are
@@ -5077,7 +5078,7 @@ of bytes successfully copied is returned. If the call completes successfully
then ``length`` is returned.
4.131 KVM_GET_SREGS2
-------------------
+--------------------
:Capability: KVM_CAP_SREGS2
:Architectures: x86
@@ -5090,17 +5091,17 @@ This ioctl (when supported) replaces the KVM_GET_SREGS.
::
-struct kvm_sregs2 {
- /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
- struct kvm_segment cs, ds, es, fs, gs, ss;
- struct kvm_segment tr, ldt;
- struct kvm_dtable gdt, idt;
- __u64 cr0, cr2, cr3, cr4, cr8;
- __u64 efer;
- __u64 apic_base;
- __u64 flags;
- __u64 pdptrs[4];
-};
+ struct kvm_sregs2 {
+ /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+ struct kvm_segment cs, ds, es, fs, gs, ss;
+ struct kvm_segment tr, ldt;
+ struct kvm_dtable gdt, idt;
+ __u64 cr0, cr2, cr3, cr4, cr8;
+ __u64 efer;
+ __u64 apic_base;
+ __u64 flags;
+ __u64 pdptrs[4];
+ };
flags values for ``kvm_sregs2``:
@@ -5110,7 +5111,7 @@ flags values for ``kvm_sregs2``:
4.132 KVM_SET_SREGS2
-------------------
+--------------------
:Capability: KVM_CAP_SREGS2
:Architectures: x86
@@ -5201,12 +5202,16 @@ trailing ``'\0'``, is indicated by the ``name_size`` field in the header.
The descriptors block is only needed to be read once for the lifetime of the
file descriptor contains a sequence of ``struct kvm_stats_desc``, each followed
by a string of size ``name_size``.
+::
#define KVM_STATS_TYPE_SHIFT 0
#define KVM_STATS_TYPE_MASK (0xF << KVM_STATS_TYPE_SHIFT)
#define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT)
#define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT)
#define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT)
+ #define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT)
+ #define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT)
+ #define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST
#define KVM_STATS_UNIT_SHIFT 4
#define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT)
@@ -5214,18 +5219,20 @@ by a string of size ``name_size``.
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
+ #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
#define KVM_STATS_BASE_POW10 (0x0 << KVM_STATS_BASE_SHIFT)
#define KVM_STATS_BASE_POW2 (0x1 << KVM_STATS_BASE_SHIFT)
+ #define KVM_STATS_BASE_MAX KVM_STATS_BASE_POW2
struct kvm_stats_desc {
__u32 flags;
__s16 exponent;
__u16 size;
__u32 offset;
- __u32 unused;
+ __u32 bucket_size;
char name[];
};
@@ -5234,24 +5241,40 @@ by this descriptor. Its endianness is CPU native.
The following flags are supported:
Bits 0-3 of ``flags`` encode the type:
+
* ``KVM_STATS_TYPE_CUMULATIVE``
- The statistics data is cumulative. The value of data can only be increased.
+ The statistics reports a cumulative count. The value of data can only be increased.
Most of the counters used in KVM are of this type.
The corresponding ``size`` field for this type is always 1.
All cumulative statistics data are read/write.
* ``KVM_STATS_TYPE_INSTANT``
- The statistics data is instantaneous. Its value can be increased or
+ The statistics reports an instantaneous value. Its value can be increased or
decreased. This type is usually used as a measurement of some resources,
like the number of dirty pages, the number of large pages, etc.
All instant statistics are read only.
The corresponding ``size`` field for this type is always 1.
* ``KVM_STATS_TYPE_PEAK``
- The statistics data is peak. The value of data can only be increased, and
- represents a peak value for a measurement, for example the maximum number
+ The statistics data reports a peak value, for example the maximum number
of items in a hash table bucket, the longest time waited and so on.
+ The value of data can only be increased.
The corresponding ``size`` field for this type is always 1.
+ * ``KVM_STATS_TYPE_LINEAR_HIST``
+ The statistic is reported as a linear histogram. The number of
+ buckets is specified by the ``size`` field. The size of buckets is specified
+ by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``)
+ is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last
+ bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity
+ value.) The bucket value indicates how many samples fell in the bucket's range.
+ * ``KVM_STATS_TYPE_LOG_HIST``
+ The statistic is reported as a logarithmic histogram. The number of
+ buckets is specified by the ``size`` field. The range of the first bucket is
+ [0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF).
+ Otherwise, The Nth bucket (1 < N < ``size``) covers
+ [pow(2, N-2), pow(2, N-1)). The bucket value indicates how many samples fell
+ in the bucket's range.
Bits 4-7 of ``flags`` encode the unit:
+
* ``KVM_STATS_UNIT_NONE``
There is no unit for the value of statistics data. This usually means that
the value is a simple counter of an event.
@@ -5266,6 +5289,7 @@ Bits 4-7 of ``flags`` encode the unit:
Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the
unit:
+
* ``KVM_STATS_BASE_POW10``
The scale is based on power of 10. It is used for measurement of time and
CPU clock cycles. For example, an exponent of -9 can be used with
@@ -5282,9 +5306,9 @@ unsigned 64bit data.
The ``offset`` field is the offset from the start of Data Block to the start of
the corresponding statistics data.
-The ``unused`` field is reserved for future support for other types of
-statistics data, like log/linear histogram. Its value is always 0 for the types
-defined above.
+The ``bucket_size`` field is used as a parameter for histogram statistics data.
+It is only used by linear histogram statistics data, specifying the size of a
+bucket.
The ``name`` field is the name string of the statistics data. The name string
starts at the end of ``struct kvm_stats_desc``. The maximum length including
@@ -7213,7 +7237,7 @@ supported in the host. A VMM can check whether the service is
available to the guest on migration.
8.33 KVM_CAP_HYPERV_ENFORCE_CPUID
------------------------------
+---------------------------------
Architectures: x86
diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst
index 35eca377543d..5d27da356836 100644
--- a/Documentation/virt/kvm/locking.rst
+++ b/Documentation/virt/kvm/locking.rst
@@ -21,14 +21,20 @@ The acquisition orders for mutexes are as follows:
can be taken inside a kvm->srcu read-side critical section,
while kvm->slots_lock cannot.
+- kvm->mn_active_invalidate_count ensures that pairs of
+ invalidate_range_start() and invalidate_range_end() callbacks
+ use the same memslots array. kvm->slots_lock and kvm->slots_arch_lock
+ are taken on the waiting side in install_new_memslots, so MMU notifiers
+ must not take either kvm->slots_lock or kvm->slots_arch_lock.
+
On x86:
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
-- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is
- taken inside kvm->arch.mmu_lock, and cannot be taken without already
- holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise
- there's no need to take kvm->arch.tdp_mmu_pages_lock at all).
+- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and
+ kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
+ cannot be taken without already holding kvm->arch.mmu_lock (typically with
+ ``read_lock`` for the TDP MMU, thus the need for additional spinlocks).
Everything else is a leaf: no other lock is taken inside the critical
sections.