summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/apic/io_apic.c3
-rw-r--r--arch/x86/kernel/asm-offsets.c3
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event.h2
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd_ibs.c61
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c48
-rw-r--r--arch/x86/kernel/cpu/perf_event_knc.c319
-rw-r--r--arch/x86/kernel/cpu/perf_event_p6.c127
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/e820.c3
-rw-r--r--arch/x86/kernel/entry_32.S54
-rw-r--r--arch/x86/kernel/entry_64.S62
-rw-r--r--arch/x86/kernel/kgdb.c2
-rw-r--r--arch/x86/kernel/kvm.c3
-rw-r--r--arch/x86/kernel/process.c65
-rw-r--r--arch/x86/kernel/process_32.c37
-rw-r--r--arch/x86/kernel/process_64.c35
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kernel/setup.c33
-rw-r--r--arch/x86/kernel/signal.c8
-rw-r--r--arch/x86/kernel/sys_i386_32.c40
-rw-r--r--arch/x86/kernel/uprobes.c16
-rw-r--r--arch/x86/kernel/vm86_32.c6
-rw-r--r--arch/x86/kernel/vsyscall_64.c49
30 files changed, 708 insertions, 320 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a48ea05157d..91ce48f05f9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ obj-y += time.o ioport.o ldt.o dumpstack.o nmi.o
obj-y += setup.o x86_init.o i8259.o irqinit.o jump_label.o
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-y += probe_roms.o
-obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
+obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-y += syscall_$(BITS).o
obj-$(CONFIG_X86_64) += vsyscall_64.o
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index c265593ec2c..1817fa91102 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2257,6 +2257,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
continue;
cfg = irq_cfg(irq);
+ if (!cfg)
+ continue;
+
raw_spin_lock(&desc->lock);
/*
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 68de2dc962e..28610822fb3 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -69,4 +69,7 @@ void common(void) {
OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
OFFSET(BP_pref_address, boot_params, hdr.pref_address);
OFFSET(BP_code32_start, boot_params, hdr.code32_start);
+
+ BLANK();
+ DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
}
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index d30a6a9a012..a0e067d3d96 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
ifdef CONFIG_PERF_EVENTS
obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o
-obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_p4.o
+obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o
endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 9a7c90d80bc..93c5451bdd5 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -991,7 +991,7 @@ static struct attribute ** __cpuinit amd_l3_attrs(void)
if (attrs)
return attrs;
- n = sizeof (default_attrs) / sizeof (struct attribute *);
+ n = ARRAY_SIZE(default_attrs);
if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
n += 2;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 29e87d3b284..46cbf868969 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2209,11 +2209,6 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
&mce_cmci_disabled
};
-static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
- __ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
- &mce_bios_cmci_threshold
-};
-
static struct device_attribute *mce_device_attrs[] = {
&dev_attr_tolerant.attr,
&dev_attr_check_interval.attr,
@@ -2222,7 +2217,6 @@ static struct device_attribute *mce_device_attrs[] = {
&dev_attr_dont_log_ce.attr,
&dev_attr_ignore_ce.attr,
&dev_attr_cmci_disabled.attr,
- &dev_attr_bios_cmci_threshold.attr,
NULL
};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index c4e916d7737..698b6ec12e0 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -576,12 +576,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
int err = 0;
if (shared_bank[bank]) {
-
nb = node_to_amd_nb(amd_get_nb_id(cpu));
- WARN_ON(!nb);
/* threshold descriptor already initialized on this node? */
- if (nb->bank4) {
+ if (nb && nb->bank4) {
/* yes, use it */
b = nb->bank4;
err = kobject_add(b->kobj, &dev->kobj, name);
@@ -615,8 +613,10 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
atomic_set(&b->cpus, 1);
/* nb is already initialized, see above */
- WARN_ON(nb->bank4);
- nb->bank4 = b;
+ if (nb) {
+ WARN_ON(nb->bank4);
+ nb->bank4 = b;
+ }
}
err = allocate_threshold_blocks(cpu, bank, 0,
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 915b876edd1..4a3374e61a9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -208,12 +208,14 @@ static bool check_hw_exists(void)
}
/*
- * Now write a value and read it back to see if it matches,
- * this is needed to detect certain hardware emulators (qemu/kvm)
- * that don't trap on the MSR access and always return 0s.
+ * Read the current value, change it and read it back to see if it
+ * matches, this is needed to detect certain hardware emulators
+ * (qemu/kvm) that don't trap on the MSR access and always return 0s.
*/
- val = 0xabcdUL;
reg = x86_pmu_event_addr(0);
+ if (rdmsrl_safe(reg, &val))
+ goto msr_fail;
+ val ^= 0xffffUL;
ret = wrmsrl_safe(reg, val);
ret |= rdmsrl_safe(reg, &val_new);
if (ret || val != val_new)
@@ -338,6 +340,9 @@ int x86_setup_perfctr(struct perf_event *event)
/* BTS is currently only allowed for user-mode. */
if (!attr->exclude_kernel)
return -EOPNOTSUPP;
+
+ if (!attr->exclude_guest)
+ return -EOPNOTSUPP;
}
hwc->config |= config;
@@ -380,6 +385,9 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip) {
int precise = 0;
+ if (!event->attr.exclude_guest)
+ return -EOPNOTSUPP;
+
/* Support for constant skid */
if (x86_pmu.pebs_active && !x86_pmu.pebs_broken) {
precise++;
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8b6defe7eef..271d2570029 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -626,6 +626,8 @@ int p4_pmu_init(void);
int p6_pmu_init(void);
+int knc_pmu_init(void);
+
#else /* CONFIG_CPU_SUP_INTEL */
static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index eebd5ffe1bb..6336bcbd061 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -41,17 +41,22 @@ struct cpu_perf_ibs {
};
struct perf_ibs {
- struct pmu pmu;
- unsigned int msr;
- u64 config_mask;
- u64 cnt_mask;
- u64 enable_mask;
- u64 valid_mask;
- u64 max_period;
- unsigned long offset_mask[1];
- int offset_max;
- struct cpu_perf_ibs __percpu *pcpu;
- u64 (*get_count)(u64 config);
+ struct pmu pmu;
+ unsigned int msr;
+ u64 config_mask;
+ u64 cnt_mask;
+ u64 enable_mask;
+ u64 valid_mask;
+ u64 max_period;
+ unsigned long offset_mask[1];
+ int offset_max;
+ struct cpu_perf_ibs __percpu *pcpu;
+
+ struct attribute **format_attrs;
+ struct attribute_group format_group;
+ const struct attribute_group *attr_groups[2];
+
+ u64 (*get_count)(u64 config);
};
struct perf_ibs_data {
@@ -446,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags)
static void perf_ibs_read(struct perf_event *event) { }
+PMU_FORMAT_ATTR(rand_en, "config:57");
+PMU_FORMAT_ATTR(cnt_ctl, "config:19");
+
+static struct attribute *ibs_fetch_format_attrs[] = {
+ &format_attr_rand_en.attr,
+ NULL,
+};
+
+static struct attribute *ibs_op_format_attrs[] = {
+ NULL, /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
+ NULL,
+};
+
static struct perf_ibs perf_ibs_fetch = {
.pmu = {
.task_ctx_nr = perf_invalid_context,
@@ -465,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = {
.max_period = IBS_FETCH_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSFETCH_REG_MASK },
.offset_max = MSR_AMD64_IBSFETCH_REG_COUNT,
+ .format_attrs = ibs_fetch_format_attrs,
.get_count = get_ibs_fetch_count,
};
@@ -488,6 +507,7 @@ static struct perf_ibs perf_ibs_op = {
.max_period = IBS_OP_MAX_CNT << 4,
.offset_mask = { MSR_AMD64_IBSOP_REG_MASK },
.offset_max = MSR_AMD64_IBSOP_REG_COUNT,
+ .format_attrs = ibs_op_format_attrs,
.get_count = get_ibs_op_count,
};
@@ -597,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
perf_ibs->pcpu = pcpu;
+ /* register attributes */
+ if (perf_ibs->format_attrs[0]) {
+ memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
+ perf_ibs->format_group.name = "format";
+ perf_ibs->format_group.attrs = perf_ibs->format_attrs;
+
+ memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
+ perf_ibs->attr_groups[0] = &perf_ibs->format_group;
+ perf_ibs->pmu.attr_groups = perf_ibs->attr_groups;
+ }
+
ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
if (ret) {
perf_ibs->pcpu = NULL;
@@ -608,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
static __init int perf_event_ibs_init(void)
{
+ struct attribute **attr = ibs_op_format_attrs;
+
if (!ibs_caps)
return -ENODEV; /* ibs not supported by the cpu */
perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
- if (ibs_caps & IBS_CAPS_OPCNT)
+
+ if (ibs_caps & IBS_CAPS_OPCNT) {
perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+ *attr++ = &format_attr_cnt_ctl.attr;
+ }
perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+
register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6bca492b854..324bb523d9d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void)
switch (boot_cpu_data.x86) {
case 0x6:
return p6_pmu_init();
+ case 0xb:
+ return knc_pmu_init();
case 0xf:
return p4_pmu_init();
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 99d96a4978b..3cf3d97cce3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -118,22 +118,24 @@ static void snbep_uncore_pci_disable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
- u32 config;
+ u32 config = 0;
- pci_read_config_dword(pdev, box_ctl, &config);
- config |= SNBEP_PMON_BOX_CTL_FRZ;
- pci_write_config_dword(pdev, box_ctl, config);
+ if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+ config |= SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+ }
}
static void snbep_uncore_pci_enable_box(struct intel_uncore_box *box)
{
struct pci_dev *pdev = box->pci_dev;
int box_ctl = uncore_pci_box_ctl(box);
- u32 config;
+ u32 config = 0;
- pci_read_config_dword(pdev, box_ctl, &config);
- config &= ~SNBEP_PMON_BOX_CTL_FRZ;
- pci_write_config_dword(pdev, box_ctl, config);
+ if (!pci_read_config_dword(pdev, box_ctl, &config)) {
+ config &= ~SNBEP_PMON_BOX_CTL_FRZ;
+ pci_write_config_dword(pdev, box_ctl, config);
+ }
}
static void snbep_uncore_pci_enable_event(struct intel_uncore_box *box, struct perf_event *event)
@@ -156,7 +158,7 @@ static u64 snbep_uncore_pci_read_counter(struct intel_uncore_box *box, struct pe
{
struct pci_dev *pdev = box->pci_dev;
struct hw_perf_event *hwc = &event->hw;
- u64 count;
+ u64 count = 0;
pci_read_config_dword(pdev, hwc->event_base, (u32 *)&count);
pci_read_config_dword(pdev, hwc->event_base + 4, (u32 *)&count + 1);
@@ -603,11 +605,12 @@ static struct pci_driver snbep_uncore_pci_driver = {
/*
* build pci bus to socket mapping
*/
-static void snbep_pci2phy_map_init(void)
+static int snbep_pci2phy_map_init(void)
{
struct pci_dev *ubox_dev = NULL;
int i, bus, nodeid;
- u32 config;
+ int err = 0;
+ u32 config = 0;
while (1) {
/* find the UBOX device */
@@ -618,10 +621,14 @@ static void snbep_pci2phy_map_init(void)
break;
bus = ubox_dev->bus->number;
/* get the Node ID of the local register */
- pci_read_config_dword(ubox_dev, 0x40, &config);
+ err = pci_read_config_dword(ubox_dev, 0x40, &config);
+ if (err)
+ break;
nodeid = config;
/* get the Node ID mapping */
- pci_read_config_dword(ubox_dev, 0x54, &config);
+ err = pci_read_config_dword(ubox_dev, 0x54, &config);
+ if (err)
+ break;
/*
* every three bits in the Node ID mapping register maps
* to a particular node.
@@ -633,7 +640,11 @@ static void snbep_pci2phy_map_init(void)
}
}
};
- return;
+
+ if (ubox_dev)
+ pci_dev_put(ubox_dev);
+
+ return err ? pcibios_err_to_errno(err) : 0;
}
/* end of Sandy Bridge-EP uncore support */
@@ -1547,7 +1558,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
struct hw_perf_event_extra *reg1 = &hwc->extra_reg;
- int port;
/* adjust the main event selector and extra register index */
if (reg1->idx % 2) {
@@ -1559,7 +1569,6 @@ void nhmex_rbox_alter_er(struct intel_uncore_box *box, struct perf_event *event)
}
/* adjust extra register config */
- port = reg1->idx / 6 + box->pmu->pmu_idx * 4;
switch (reg1->idx % 6) {
case 2:
/* shift the 8~15 bits to the 0~7 bits */
@@ -2578,9 +2587,11 @@ static int __init uncore_pci_init(void)
switch (boot_cpu_data.x86_model) {
case 45: /* Sandy Bridge-EP */
+ ret = snbep_pci2phy_map_init();
+ if (ret)
+ return ret;
pci_uncores = snbep_pci_uncores;
uncore_pci_driver = &snbep_uncore_pci_driver;
- snbep_pci2phy_map_init();
break;
default:
return 0;
@@ -2926,6 +2937,9 @@ static int __init intel_uncore_init(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return -ENODEV;
+ if (cpu_has_hypervisor)
+ return -ENODEV;
+
ret = uncore_pci_init();
if (ret)
goto fail;
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
new file mode 100644
index 00000000000..4b7731bf23a
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -0,0 +1,319 @@
+/* Driver for Intel Xeon Phi "Knights Corner" PMU */
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include <asm/hardirq.h>
+
+#include "perf_event.h"
+
+static const u64 knc_perfmon_event_map[] =
+{
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0028,
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0012,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
+};
+
+static __initconst u64 knc_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ /* On Xeon Phi event "0" is a valid DATA_READ */
+ /* (L1 Data Cache Reads) Instruction. */
+ /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
+ /* bit will always be set in x86_pmu_hw_config(). */
+ [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+ /* DATA_READ */
+ [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
+ [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
+ [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
+ [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
+ [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+ /* DATA_READ */
+ /* see note on L1 OP_READ */
+ [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
+ [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
+ [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
+ [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+
+static u64 knc_pmu_event_map(int hw_event)
+{
+ return knc_perfmon_event_map[hw_event];
+}
+
+static struct event_constraint knc_event_constraints[] =
+{
+ INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
+ INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
+ INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
+ INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
+ INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
+ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
+ INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
+ INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
+ INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
+ INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
+ INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
+ INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
+ INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
+ INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
+ INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
+ INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
+ INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
+ INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
+ INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
+ INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
+ INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
+ EVENT_CONSTRAINT_END
+};
+
+#define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
+#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
+#define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
+
+#define KNC_ENABLE_COUNTER0 0x00000001
+#define KNC_ENABLE_COUNTER1 0x00000002
+
+static void knc_pmu_disable_all(void)
+{
+ u64 val;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+ val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static void knc_pmu_enable_all(int added)
+{
+ u64 val;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+ val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static inline void
+knc_pmu_disable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 val;
+
+ val = hwc->config;
+ val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static void knc_pmu_enable_event(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ u64 val;
+
+ val = hwc->config;
+ val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static inline u64 knc_pmu_get_status(void)
+{
+ u64 status;
+
+ rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
+
+ return status;
+}
+
+static inline void knc_pmu_ack_status(u64 ack)
+{
+ wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL, ack);
+}
+
+static int knc_pmu_handle_irq(struct pt_regs *regs)
+{
+ struct perf_sample_data data;
+ struct cpu_hw_events *cpuc;
+ int handled = 0;
+ int bit, loops;
+ u64 status;
+
+ cpuc = &__get_cpu_var(cpu_hw_events);
+
+ knc_pmu_disable_all();
+
+ status = knc_pmu_get_status();
+ if (!status) {
+ knc_pmu_enable_all(0);
+ return handled;
+ }
+
+ loops = 0;
+again:
+ knc_pmu_ack_status(status);
+ if (++loops > 100) {
+ WARN_ONCE(1, "perf: irq loop stuck!\n");
+ perf_event_print_debug();
+ goto done;
+ }
+
+ inc_irq_stat(apic_perf_irqs);
+
+ for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
+ struct perf_event *event = cpuc->events[bit];
+
+ handled++;
+
+ if (!test_bit(bit, cpuc->active_mask))
+ continue;
+
+ if (!intel_pmu_save_and_restart(event))
+ continue;
+
+ perf_sample_data_init(&data, 0, event->hw.last_period);
+
+ if (perf_event_overflow(event, &data, regs))
+ x86_pmu_stop(event, 0);
+ }
+
+ /*
+ * Repeat if there is more work to be done:
+ */
+ status = knc_pmu_get_status();
+ if (status)
+ goto again;
+
+done:
+ knc_pmu_enable_all(0);
+
+ return handled;
+}
+
+
+PMU_FORMAT_ATTR(event, "config:0-7" );
+PMU_FORMAT_ATTR(umask, "config:8-15" );
+PMU_FORMAT_ATTR(edge, "config:18" );
+PMU_FORMAT_ATTR(inv, "config:23" );
+PMU_FORMAT_ATTR(cmask, "config:24-31" );
+
+static struct attribute *intel_knc_formats_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_umask.attr,
+ &format_attr_edge.attr,
+ &format_attr_inv.attr,
+ &format_attr_cmask.attr,
+ NULL,
+};
+
+static __initconst struct x86_pmu knc_pmu = {
+ .name = "knc",
+ .handle_irq = knc_pmu_handle_irq,
+ .disable_all = knc_pmu_disable_all,
+ .enable_all = knc_pmu_enable_all,
+ .enable = knc_pmu_enable_event,
+ .disable = knc_pmu_disable_event,
+ .hw_config = x86_pmu_hw_config,
+ .schedule_events = x86_schedule_events,
+ .eventsel = MSR_KNC_EVNTSEL0,
+ .perfctr = MSR_KNC_PERFCTR0,
+ .event_map = knc_pmu_event_map,
+ .max_events = ARRAY_SIZE(knc_perfmon_event_map),
+ .apic = 1,
+ .max_period = (1ULL << 39) - 1,
+ .version = 0,
+ .num_counters = 2,
+ .cntval_bits = 40,
+ .cntval_mask = (1ULL << 40) - 1,
+ .get_event_constraints = x86_get_event_constraints,
+ .event_constraints = knc_event_constraints,
+ .format_attrs = intel_knc_formats_attr,
+};
+
+__init int knc_pmu_init(void)
+{
+ x86_pmu = knc_pmu;
+
+ memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ return 0;
+}
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index e4dd0f7a045..7d0270bd793 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -8,13 +8,106 @@
*/
static const u64 p6_perfmon_event_map[] =
{
- [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
- [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
- [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
- [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
- [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
- [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
+ [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, /* CPU_CLK_UNHALTED */
+ [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, /* INST_RETIRED */
+ [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, /* L2_RQSTS:M:E:S:I */
+ [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, /* L2_RQSTS:I */
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, /* BR_INST_RETIRED */
+ [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, /* BR_MISS_PRED_RETIRED */
+ [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, /* BUS_DRDY_CLOCKS */
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a2, /* RESOURCE_STALLS */
+
+};
+
+static __initconst u64 p6_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
+ [ C(RESULT_MISS) ] = 0x0045, /* DCU_LINES_IN */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0f29, /* L2_LD:M:E:S:I */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
+ [ C(RESULT_MISS) ] = 0x0f28, /* L2_IFETCH:M:E:S:I */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0x0025, /* L2_M_LINES_INM */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0043, /* DATA_MEM_REFS */
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0080, /* IFU_IFETCH */
+ [ C(RESULT_MISS) ] = 0x0085, /* ITLB_MISS */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISS_PRED_RETIRED */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
};
static u64 p6_pmu_event_map(int hw_event)
@@ -34,7 +127,7 @@ static struct event_constraint p6_event_constraints[] =
{
INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */
INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
- INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */
+ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
@@ -64,25 +157,25 @@ static void p6_pmu_enable_all(int added)
static inline void
p6_pmu_disable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val = P6_NOP_EVENT;
- if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
-
(void)wrmsrl_safe(hwc->config_base, val);
}
static void p6_pmu_enable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
u64 val;
val = hwc->config;
- if (cpuc->enabled)
- val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+ /*
+ * p6 only has a global event enable, set on PerfEvtSel0
+ * We "disable" events by programming P6_NOP_EVENT
+ * and we rely on p6_pmu_enable_all() being called
+ * to actually enable the events.
+ */
(void)wrmsrl_safe(hwc->config_base, val);
}
@@ -158,5 +251,9 @@ __init int p6_pmu_init(void)
x86_pmu = p6_pmu;
+ memcpy(hw_cache_event_ids, p6_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+
return 0;
}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 966512b2cac..2e8caf03f59 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
switch (boot_cpu_data.x86) {
case 6:
return msr - MSR_P6_PERFCTR0;
+ case 11:
+ return msr - MSR_KNC_PERFCTR0;
case 15:
return msr - MSR_P4_BPU_PERFCTR0;
}
@@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
switch (boot_cpu_data.x86) {
case 6:
return msr - MSR_P6_EVNTSEL0;
+ case 11:
+ return msr - MSR_KNC_EVNTSEL0;
case 15:
return msr - MSR_P4_BSU_ESCR0;
}
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index ed858e9e9a7..df06ade26be 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1077,6 +1077,9 @@ void __init memblock_x86_fill(void)
memblock_add(ei->addr, ei->size);
}
+ /* throw away partial pages */
+ memblock_trim_memory(PAGE_SIZE);
+
memblock_dump_all();
}
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 0750e3ba87c..88b725aa1d5 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -299,6 +299,21 @@ ENTRY(ret_from_fork)
CFI_ENDPROC
END(ret_from_fork)
+ENTRY(ret_from_kernel_thread)
+ CFI_STARTPROC
+ pushl_cfi %eax
+ call schedule_tail
+ GET_THREAD_INFO(%ebp)
+ popl_cfi %eax
+ pushl_cfi $0x0202 # Reset kernel eflags
+ popfl_cfi
+ movl PT_EBP(%esp),%eax
+ call *PT_EBX(%esp)
+ movl $0,PT_EAX(%esp)
+ jmp syscall_exit
+ CFI_ENDPROC
+ENDPROC(ret_from_kernel_thread)
+
/*
* Interrupt exit functions should be protected against kprobes
*/
@@ -323,8 +338,7 @@ ret_from_intr:
andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
#else
/*
- * We can be coming here from a syscall done in the kernel space,
- * e.g. a failed kernel_execve().
+ * We can be coming here from child spawned by kernel_thread().
*/
movl PT_CS(%esp), %eax
andl $SEGMENT_RPL_MASK, %eax
@@ -616,6 +630,10 @@ work_notifysig: # deal with pending signals and
movl %esp, %eax
jne work_notifysig_v86 # returning to kernel-space or
# vm86-space
+1:
+#else
+ movl %esp, %eax
+#endif
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
movb PT_CS(%esp), %bl
@@ -626,24 +644,15 @@ work_notifysig: # deal with pending signals and
call do_notify_resume
jmp resume_userspace
+#ifdef CONFIG_VM86
ALIGN
work_notifysig_v86:
pushl_cfi %ecx # save ti_flags for do_notify_resume
call save_v86_state # %eax contains pt_regs pointer
popl_cfi %ecx
movl %eax, %esp
-#else
- movl %esp, %eax
+ jmp 1b
#endif
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- movb PT_CS(%esp), %bl
- andb $SEGMENT_RPL_MASK, %bl
- cmpb $USER_RPL, %bl
- jb resume_kernel
- xorl %edx, %edx
- call do_notify_resume
- jmp resume_userspace
END(work_pending)
# perform syscall exit tracing
@@ -732,7 +741,6 @@ ENDPROC(ptregs_##name)
PTREGSCALL1(iopl)
PTREGSCALL0(fork)
PTREGSCALL0(vfork)
-PTREGSCALL3(execve)
PTREGSCALL2(sigaltstack)
PTREGSCALL0(sigreturn)
PTREGSCALL0(rt_sigreturn)
@@ -1015,16 +1023,6 @@ END(spurious_interrupt_bug)
*/
.popsection
-ENTRY(kernel_thread_helper)
- pushl $0 # fake return address for unwinder
- CFI_STARTPROC
- movl %edi,%eax
- call *%esi
- call do_exit
- ud2 # padding for call trace
- CFI_ENDPROC
-ENDPROC(kernel_thread_helper)
-
#ifdef CONFIG_XEN
/* Xen doesn't set %esp to be precisely what the normal sysenter
entrypoint expects, so fix it up before using the normal path. */
@@ -1037,7 +1035,7 @@ ENTRY(xen_sysenter_target)
ENTRY(xen_hypervisor_callback)
CFI_STARTPROC
- pushl_cfi $0
+ pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
TRACE_IRQS_OFF
@@ -1079,14 +1077,16 @@ ENTRY(xen_failsafe_callback)
2: mov 8(%esp),%es
3: mov 12(%esp),%fs
4: mov 16(%esp),%gs
+ /* EAX == 0 => Category 1 (Bad segment)
+ EAX != 0 => Category 2 (Bad IRET) */
testl %eax,%eax
popl_cfi %eax
lea 16(%esp),%esp
CFI_ADJUST_CFA_OFFSET -16
jz 5f
addl $16,%esp
- jmp iret_exc # EAX != 0 => Category 2 (Bad IRET)
-5: pushl_cfi $0 # EAX == 0 => Category 1 (Bad segment)
+ jmp iret_exc
+5: pushl_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
jmp ret_from_exception
CFI_ENDPROC
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 44531acd9a8..b51b2c7ee51 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -554,7 +554,7 @@ ENTRY(ret_from_fork)
RESTORE_REST
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
- jz retint_restore_args
+ jz 1f
testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
jnz int_ret_from_sys_call
@@ -562,6 +562,14 @@ ENTRY(ret_from_fork)
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
jmp ret_from_sys_call # go to the SYSRET fastpath
+1:
+ subq $REST_SKIP, %rsp # leave space for volatiles
+ CFI_ADJUST_CFA_OFFSET REST_SKIP
+ movq %rbp, %rdi
+ call *%rbx
+ movl $0, RAX(%rsp)
+ RESTORE_REST
+ jmp int_ret_from_sys_call
CFI_ENDPROC
END(ret_from_fork)
@@ -862,7 +870,6 @@ ENTRY(stub_execve)
PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
- movq %rsp, %rcx
call sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
@@ -912,8 +919,7 @@ ENTRY(stub_x32_execve)
PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
- movq %rsp, %rcx
- call sys32_execve
+ call compat_sys_execve
RESTORE_TOP_OF_STACK %r11
movq %rax,RAX(%rsp)
RESTORE_REST
@@ -1318,52 +1324,6 @@ bad_gs:
jmp 2b
.previous
-ENTRY(kernel_thread_helper)
- pushq $0 # fake return address
- CFI_STARTPROC
- /*
- * Here we are in the child and the registers are set as they were
- * at kernel_thread() invocation in the parent.
- */
- call *%rsi
- # exit
- mov %eax, %edi
- call do_exit
- ud2 # padding for call trace
- CFI_ENDPROC
-END(kernel_thread_helper)
-
-/*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- * extern long execve(const char *name, char **argv, char **envp)
- *
- * asm input arguments:
- * rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
- *
- * do_sys_execve asm fallback arguments:
- * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
- */
-ENTRY(kernel_execve)
- CFI_STARTPROC
- FAKE_STACK_FRAME $0
- SAVE_ALL
- movq %rsp,%rcx
- call sys_execve
- movq %rax, RAX(%rsp)
- RESTORE_REST
- testq %rax,%rax
- je int_ret_from_sys_call
- RESTORE_ARGS
- UNFAKE_STACK_FRAME
- ret
- CFI_ENDPROC
-END(kernel_execve)
-
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
CFI_STARTPROC
@@ -1475,7 +1435,7 @@ ENTRY(xen_failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
- pushq_cfi $0
+ pushq_cfi $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
jmp error_exit
CFI_ENDPROC
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 3f61904365c..836f8322960 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -746,7 +746,9 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
+#ifdef CONFIG_DEBUG_RODATA
char opc[BREAK_INSTR_SIZE];
+#endif /* CONFIG_DEBUG_RODATA */
bpt->type = BP_BREAKPOINT;
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b3e5e51bc90..4180a874c76 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -247,7 +247,10 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
/* page is swapped out by the host. */
+ rcu_irq_enter();
+ exit_idle();
kvm_async_pf_task_wait((u32)read_cr2());
+ rcu_irq_exit();
break;
case KVM_PV_REASON_PAGE_READY:
rcu_irq_enter();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index dc3567e083f..b644e1c765d 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -293,71 +293,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
}
/*
- * This gets run with %si containing the
- * function to call, and %di containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
- struct pt_regs regs;
-
- memset(&regs, 0, sizeof(regs));
-
- regs.si = (unsigned long) fn;
- regs.di = (unsigned long) arg;
-
-#ifdef CONFIG_X86_32
- regs.ds = __USER_DS;
- regs.es = __USER_DS;
- regs.fs = __KERNEL_PERCPU;
- regs.gs = __KERNEL_STACK_CANARY;
-#else
- regs.ss = __KERNEL_DS;
-#endif
-
- regs.orig_ax = -1;
- regs.ip = (unsigned long) kernel_thread_helper;
- regs.cs = __KERNEL_CS | get_kernel_rpl();
- regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
- * sys_execve() executes a new program.
- */
-long sys_execve(const char __user *name,
- const char __user *const __user *argv,
- const char __user *const __user *envp, struct pt_regs *regs)
-{
- long error;
- char *filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = do_execve(filename, argv, envp, regs);
-
-#ifdef CONFIG_X86_32
- if (error == 0) {
- /* Make sure we don't return using sysenter.. */
- set_thread_flag(TIF_IRET);
- }
-#endif
-
- putname(filename);
- return error;
-}
-
-/*
* Idle related variables and functions
*/
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index b9ff83c7135..44e0bff38e7 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,7 @@
#include <asm/switch_to.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
/*
* Return saved PC of a blocked thread.
@@ -127,23 +128,39 @@ void release_thread(struct task_struct *dead_task)
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
- struct pt_regs *childregs;
+ struct pt_regs *childregs = task_pt_regs(p);
struct task_struct *tsk;
int err;
- childregs = task_pt_regs(p);
+ p->thread.sp = (unsigned long) childregs;
+ p->thread.sp0 = (unsigned long) (childregs+1);
+
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(childregs, 0, sizeof(struct pt_regs));
+ p->thread.ip = (unsigned long) ret_from_kernel_thread;
+ task_user_gs(p) = __KERNEL_STACK_CANARY;
+ childregs->ds = __USER_DS;
+ childregs->es = __USER_DS;
+ childregs->fs = __KERNEL_PERCPU;
+ childregs->bx = sp; /* function */
+ childregs->bp = arg;
+ childregs->orig_ax = -1;
+ childregs->cs = __KERNEL_CS | get_kernel_rpl();
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ p->fpu_counter = 0;
+ p->thread.io_bitmap_ptr = NULL;
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+ return 0;
+ }
*childregs = *regs;
childregs->ax = 0;
childregs->sp = sp;
- p->thread.sp = (unsigned long) childregs;
- p->thread.sp0 = (unsigned long) (childregs+1);
-
p->thread.ip = (unsigned long) ret_from_fork;
-
task_user_gs(p) = get_user_gs(regs);
p->fpu_counter = 0;
@@ -190,6 +207,12 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->cs = __USER_CS;
regs->ip = new_ip;
regs->sp = new_sp;
+ regs->flags = X86_EFLAGS_IF;
+ /*
+ * force it to the iret return path by making it look as if there was
+ * some work pending.
+ */
+ set_thread_flag(TIF_NOTIFY_RESUME);
}
EXPORT_SYMBOL_GPL(start_thread);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8a6d20ce197..16c6365e2b8 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,29 +146,18 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
}
int copy_thread(unsigned long clone_flags, unsigned long sp,
- unsigned long unused,
+ unsigned long arg,
struct task_struct *p, struct pt_regs *regs)
{
int err;
struct pt_regs *childregs;
struct task_struct *me = current;
- childregs = ((struct pt_regs *)
- (THREAD_SIZE + task_stack_page(p))) - 1;
- *childregs = *regs;
-
- childregs->ax = 0;
- if (user_mode(regs))
- childregs->sp = sp;
- else
- childregs->sp = (unsigned long)childregs;
-
+ p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+ childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
- p->thread.sp0 = (unsigned long) (childregs+1);
p->thread.usersp = me->thread.usersp;
-
set_tsk_thread_flag(p, TIF_FORK);
-
p->fpu_counter = 0;
p->thread.io_bitmap_ptr = NULL;
@@ -178,6 +167,24 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+ if (unlikely(!regs)) {
+ /* kernel thread */
+ memset(childregs, 0, sizeof(struct pt_regs));
+ childregs->sp = (unsigned long)childregs;
+ childregs->ss = __KERNEL_DS;
+ childregs->bx = sp; /* function */
+ childregs->bp = arg;
+ childregs->orig_ax = -1;
+ childregs->cs = __KERNEL_CS | get_kernel_rpl();
+ childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+ return 0;
+ }
+ *childregs = *regs;
+
+ childregs->ax = 0;
+ childregs->sp = sp;
err = -ENOMEM;
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 52190a938b4..4e8ba39eaf0 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -358,14 +358,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "VGN-Z540N"),
},
},
- { /* Handle problems with rebooting on CompuLab SBC-FITPC2 */
- .callback = set_bios_reboot,
- .ident = "CompuLab SBC-FITPC2",
- .matches = {
- DMI_MATCH(DMI_SYS_VENDOR, "CompuLab"),
- DMI_MATCH(DMI_PRODUCT_NAME, "SBC-FITPC2"),
- },
- },
{ /* Handle problems with rebooting on ASUS P4S800 */
.callback = set_bios_reboot,
.ident = "ASUS P4S800",
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d609be046b5..ca45696f30f 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -68,6 +68,7 @@
#include <linux/percpu.h>
#include <linux/crash_dump.h>
#include <linux/tboot.h>
+#include <linux/jiffies.h>
#include <video/edid.h>
@@ -919,8 +920,22 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_64
if (max_pfn > max_low_pfn) {
- max_pfn_mapped = init_memory_mapping(1UL<<32,
- max_pfn<<PAGE_SHIFT);
+ int i;
+ unsigned long start, end;
+ unsigned long start_pfn, end_pfn;
+
+ for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn,
+ NULL) {
+
+ end = PFN_PHYS(end_pfn);
+ if (end <= (1UL<<32))
+ continue;
+
+ start = PFN_PHYS(start_pfn);
+ max_pfn_mapped = init_memory_mapping(
+ max((1UL<<32), start), end);
+ }
+
/* can we preseve max_low_pfn ?*/
max_low_pfn = max_pfn;
}
@@ -1032,6 +1047,20 @@ void __init setup_arch(char **cmdline_p)
mcheck_init();
arch_init_ideal_nops();
+
+ register_refined_jiffies(CLOCK_TICK_RATE);
+
+#ifdef CONFIG_EFI
+ /* Once setup is done above, disable efi_enabled on mismatched
+ * firmware/kernel archtectures since there is no support for
+ * runtime services.
+ */
+ if (efi_enabled && IS_ENABLED(CONFIG_X86_64) != efi_64bit) {
+ pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n");
+ efi_unmap_memmap();
+ efi_enabled = 0;
+ }
+#endif
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b33144c8b30..70b27ee6118 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -824,10 +824,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
mce_notify_process();
#endif /* CONFIG_X86_64 && CONFIG_X86_MCE */
- if (thread_info_flags & _TIF_UPROBE) {
- clear_thread_flag(TIF_UPROBE);
+ if (thread_info_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
- }
/* deal with pending signal delivery */
if (thread_info_flags & _TIF_SIGPENDING)
@@ -840,10 +838,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
-#ifdef CONFIG_X86_32
- clear_thread_flag(TIF_IRET);
-#endif /* CONFIG_X86_32 */
-
rcu_user_enter();
}
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
deleted file mode 100644
index 0b0cb5fede1..00000000000
--- a/arch/x86/kernel/sys_i386_32.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/i386
- * platform.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/utsname.h>
-#include <linux/ipc.h>
-
-#include <linux/uaccess.h>
-#include <linux/unistd.h>
-
-#include <asm/syscalls.h>
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
- const char *const argv[],
- const char *const envp[])
-{
- long __res;
- asm volatile ("int $0x80"
- : "=a" (__res)
- : "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
- return __res;
-}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 9538f00827a..aafa5557b39 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -651,31 +651,19 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
/*
* Skip these instructions as per the currently known x86 ISA.
- * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
+ * rep=0x66*; nop=0x90
*/
static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
int i;
for (i = 0; i < MAX_UINSN_BYTES; i++) {
- if ((auprobe->insn[i] == 0x66))
+ if (auprobe->insn[i] == 0x66)
continue;
if (auprobe->insn[i] == 0x90)
return true;
- if (i == (MAX_UINSN_BYTES - 1))
- break;
-
- if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f))
- return true;
-
- if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19))
- return true;
-
- if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0))
- return true;
-
break;
}
return false;
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 54abcc0baf2..5c9687b1bde 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
if ((trapno == 3) || (trapno == 1)) {
KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
/* setting this flag forces the code in entry_32.S to
- call save_v86_state() and change the stack pointer
- to KVM86->regs32 */
- set_thread_flag(TIF_IRET);
+ the path where we call save_v86_state() and change
+ the stack pointer to KVM86->regs32 */
+ set_thread_flag(TIF_NOTIFY_RESUME);
return 0;
}
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8d141b30904..3a3e8c9e280 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -28,7 +28,7 @@
#include <linux/jiffies.h>
#include <linux/sysctl.h>
#include <linux/topology.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
#include <linux/getcpu.h>
#include <linux/cpu.h>
#include <linux/smp.h>
@@ -82,32 +82,41 @@ void update_vsyscall_tz(void)
vsyscall_gtod_data.sys_tz = sys_tz;
}
-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
- struct clocksource *clock, u32 mult)
+void update_vsyscall(struct timekeeper *tk)
{
- struct timespec monotonic;
+ struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
- write_seqcount_begin(&vsyscall_gtod_data.seq);
+ write_seqcount_begin(&vdata->seq);
/* copy vsyscall data */
- vsyscall_gtod_data.clock.vclock_mode = clock->archdata.vclock_mode;
- vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
- vsyscall_gtod_data.clock.mask = clock->mask;
- vsyscall_gtod_data.clock.mult = mult;
- vsyscall_gtod_data.clock.shift = clock->shift;
-
- vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
- vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
+ vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode;
+ vdata->clock.cycle_last = tk->clock->cycle_last;
+ vdata->clock.mask = tk->clock->mask;
+ vdata->clock.mult = tk->mult;
+ vdata->clock.shift = tk->shift;
+
+ vdata->wall_time_sec = tk->xtime_sec;
+ vdata->wall_time_snsec = tk->xtime_nsec;
+
+ vdata->monotonic_time_sec = tk->xtime_sec
+ + tk->wall_to_monotonic.tv_sec;
+ vdata->monotonic_time_snsec = tk->xtime_nsec
+ + (tk->wall_to_monotonic.tv_nsec
+ << tk->shift);
+ while (vdata->monotonic_time_snsec >=
+ (((u64)NSEC_PER_SEC) << tk->shift)) {
+ vdata->monotonic_time_snsec -=
+ ((u64)NSEC_PER_SEC) << tk->shift;
+ vdata->monotonic_time_sec++;
+ }
- monotonic = timespec_add(*wall_time, *wtm);
- vsyscall_gtod_data.monotonic_time_sec = monotonic.tv_sec;
- vsyscall_gtod_data.monotonic_time_nsec = monotonic.tv_nsec;
+ vdata->wall_time_coarse.tv_sec = tk->xtime_sec;
+ vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
- vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
- vsyscall_gtod_data.monotonic_time_coarse =
- timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm);
+ vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse,
+ tk->wall_to_monotonic);
- write_seqcount_end(&vsyscall_gtod_data.seq);
+ write_seqcount_end(&vdata->seq);
}
static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,