From 674bfa485554156aa90ce17288712fcb568a42c3 Mon Sep 17 00:00:00 2001 From: Suzuki Poulose Date: Mon, 18 Jul 2011 03:29:20 +0000 Subject: powerpc/44x: Kexec support for PPC440X chipsets This patch adds kexec support for PPC440 based chipsets. This work is based on the KEXEC patches for FSL BookE. The FSL BookE patch and the code flow could be found at the link below: http://patchwork.ozlabs.org/patch/49359/ Steps: 1) Invalidate all the TLB entries except the one this code is run from 2) Create a tmp mapping for our code in the other address space and jump to it 3) Invalidate the entry we used 4) Create a 1:1 mapping for 0-2GiB in blocks of 256M 5) Jump to the new 1:1 mapping and invalidate the tmp mapping I have tested this patches on Ebony, Sequoia boards and Virtex on QEMU. You need kexec-tools commit e8b7939b1e or newer for ppc440x support, available at: git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git Signed-off-by: Suzuki Poulose Cc: Sebastian Andrzej Siewior Signed-off-by: Josh Boyer --- arch/powerpc/include/asm/kexec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 8a33698c61b..f921eb121d3 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -2,7 +2,7 @@ #define _ASM_POWERPC_KEXEC_H #ifdef __KERNEL__ -#ifdef CONFIG_FSL_BOOKE +#if defined(CONFIG_FSL_BOOKE) || defined(CONFIG_44x) /* * On FSL-BookE we setup a 1:1 mapping which covers the first 2GiB of memory -- cgit v1.2.3 From 6a5c7be5e484bda5b2639fedf7dbe3f25c15c962 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Fri, 24 Jun 2011 09:05:22 +0000 Subject: powerpc: Override dma_get_required_mask by platform hook and ops The hook dma_get_required_mask is supposed to return the mask required by the platform to operate efficently. The generic version of dma_get_required_mask in driver/base/platform.c returns a mask based only on max_pfn. However, this is likely too big for iommu systems and could be too small for platforms that require a dma offset or have a secondary window at a high offset. Override the default, provide a hook in ppc_md used by pseries lpar and cell, and provide the default answer based on memblock_end_of_DRAM(), with hooks for get_dma_offset, and provide an implementation for iommu that looks at the defined table size. Coverting from the end address to the required bit mask is based on the generic implementation. The need for this was discovered when the qla2xxx driver switched to 64 bit dma then reverted to 32 bit when dma_get_required_mask said 32 bits was sufficient. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Cc: benh@kernel.crashing.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/dma-mapping.h | 3 +++ arch/powerpc/include/asm/machdep.h | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index dd70fac57ec..8135e66a4bb 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -20,6 +20,8 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0x0) +#define ARCH_HAS_DMA_GET_REQUIRED_MASK + /* Some dma direct funcs must be visible for use in other dma_ops */ extern void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag); @@ -69,6 +71,7 @@ static inline unsigned long device_to_mask(struct device *dev) */ #ifdef CONFIG_PPC64 extern struct dma_map_ops dma_iommu_ops; +extern u64 dma_iommu_get_required_mask(struct device *dev); #endif extern struct dma_map_ops dma_direct_ops; diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 47cacddb14c..58fc2162301 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -85,8 +85,9 @@ struct machdep_calls { void (*pci_dma_dev_setup)(struct pci_dev *dev); void (*pci_dma_bus_setup)(struct pci_bus *bus); - /* Platform set_dma_mask override */ + /* Platform set_dma_mask and dma_get_required_mask overrides */ int (*dma_set_mask)(struct device *dev, u64 dma_mask); + u64 (*dma_get_required_mask)(struct device *dev); int (*probe)(void); void (*setup_arch)(void); /* Optional, may be NULL */ -- cgit v1.2.3 From d24f9c6999eacd3a7bc2b289e49fcb2bf2fafef2 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Fri, 24 Jun 2011 09:05:24 +0000 Subject: powerpc: Use the newly added get_required_mask dma_map_ops hook Now that the generic code has dma_map_ops set, instead of having a messy ifdef & if block in the base dma_get_required_mask hook push the computation into the dma ops. If the ops fails to set the get_required_mask hook default to the width of dma_addr_t. This also corrects ibmbus ibmebus_dma_supported to require a 64 bit mask. I doubt anything is checking or setting the dma mask on that bus. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Cc: benh@kernel.crashing.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/device.h | 2 ++ arch/powerpc/include/asm/dma-mapping.h | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 16d25c0974b..d57c08acedf 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -37,4 +37,6 @@ struct pdev_archdata { u64 dma_mask; }; +#define ARCH_HAS_DMA_GET_REQUIRED_MASK + #endif /* _ASM_POWERPC_DEVICE_H */ diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 8135e66a4bb..dd70fac57ec 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -20,8 +20,6 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0x0) -#define ARCH_HAS_DMA_GET_REQUIRED_MASK - /* Some dma direct funcs must be visible for use in other dma_ops */ extern void *dma_direct_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag); @@ -71,7 +69,6 @@ static inline unsigned long device_to_mask(struct device *dev) */ #ifdef CONFIG_PPC64 extern struct dma_map_ops dma_iommu_ops; -extern u64 dma_iommu_get_required_mask(struct device *dev); #endif extern struct dma_map_ops dma_direct_ops; -- cgit v1.2.3 From 41151e77a4d96ea138cede6d84c955aa4769ce74 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Tue, 28 Jun 2011 09:54:48 +0000 Subject: powerpc: Hugetlb for BookE Enable hugepages on Freescale BookE processors. This allows the kernel to use huge TLB entries to map pages, which can greatly reduce the number of TLB misses and the amount of TLB thrashing experienced by applications with large memory footprints. Care should be taken when using this on FSL processors, as the number of large TLB entries supported by the core is low (16-64) on current processors. The supported set of hugepage sizes include 4m, 16m, 64m, 256m, and 1g. Page sizes larger than the max zone size are called "gigantic" pages and must be allocated on the command line (and cannot be deallocated). This is currently only fully implemented for Freescale 32-bit BookE processors, but there is some infrastructure in the code for 64-bit BooKE. Signed-off-by: Becky Bruce Signed-off-by: David Gibson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/hugetlb.h | 63 +++++++++++++++++++++++++++++++++-- arch/powerpc/include/asm/mmu-book3e.h | 7 ++++ arch/powerpc/include/asm/mmu-hash64.h | 3 +- arch/powerpc/include/asm/mmu.h | 18 +++++----- arch/powerpc/include/asm/page.h | 31 ++++++++++++++++- arch/powerpc/include/asm/page_64.h | 11 ------ arch/powerpc/include/asm/pte-book3e.h | 3 ++ 7 files changed, 112 insertions(+), 24 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 5856a66ab40..86004930a78 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -1,15 +1,60 @@ #ifndef _ASM_POWERPC_HUGETLB_H #define _ASM_POWERPC_HUGETLB_H +#ifdef CONFIG_HUGETLB_PAGE #include +extern struct kmem_cache *hugepte_cache; +extern void __init reserve_hugetlb_gpages(void); + +static inline pte_t *hugepd_page(hugepd_t hpd) +{ + BUG_ON(!hugepd_ok(hpd)); + return (pte_t *)((hpd.pd & ~HUGEPD_SHIFT_MASK) | PD_HUGE); +} + +static inline unsigned int hugepd_shift(hugepd_t hpd) +{ + return hpd.pd & HUGEPD_SHIFT_MASK; +} + +static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr, + unsigned pdshift) +{ + /* + * On 32-bit, we have multiple higher-level table entries that point to + * the same hugepte. Just use the first one since they're all + * identical. So for that case, idx=0. + */ + unsigned long idx = 0; + + pte_t *dir = hugepd_page(*hpdp); +#ifdef CONFIG_PPC64 + idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(*hpdp); +#endif + + return dir + idx; +} + pte_t *huge_pte_offset_and_shift(struct mm_struct *mm, unsigned long addr, unsigned *shift); void flush_dcache_icache_hugepage(struct page *page); +#if defined(CONFIG_PPC_MM_SLICES) || defined(CONFIG_PPC_SUBPAGE_PROT) int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len); +#else +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) +{ + return 0; +} +#endif + +void book3e_hugetlb_preload(struct mm_struct *mm, unsigned long ea, pte_t pte); +void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr); void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, @@ -50,8 +95,11 @@ static inline void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { - unsigned long old = pte_update(mm, addr, ptep, ~0UL, 1); - return __pte(old); +#ifdef CONFIG_PPC64 + return __pte(pte_update(mm, addr, ptep, ~0UL, 1)); +#else + return __pte(pte_update(ptep, ~0UL, 0)); +#endif } static inline void huge_ptep_clear_flush(struct vm_area_struct *vma, @@ -93,4 +141,15 @@ static inline void arch_release_hugepage(struct page *page) { } +#else /* ! CONFIG_HUGETLB_PAGE */ +static inline void reserve_hugetlb_gpages(void) +{ + pr_err("Cannot reserve gpages without hugetlb enabled\n"); +} +static inline void flush_hugetlb_page(struct vm_area_struct *vma, + unsigned long vmaddr) +{ +} +#endif + #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 3ea0f9a259d..0260ea5ec3c 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -66,6 +66,7 @@ #define MAS2_M 0x00000004 #define MAS2_G 0x00000002 #define MAS2_E 0x00000001 +#define MAS2_WIMGE_MASK 0x0000001f #define MAS2_EPN_MASK(size) (~0 << (size + 10)) #define MAS2_VAL(addr, size, flags) ((addr) & MAS2_EPN_MASK(size) | (flags)) @@ -80,6 +81,7 @@ #define MAS3_SW 0x00000004 #define MAS3_UR 0x00000002 #define MAS3_SR 0x00000001 +#define MAS3_BAP_MASK 0x0000003f #define MAS3_SPSIZE 0x0000003e #define MAS3_SPSIZE_SHIFT 1 @@ -212,6 +214,11 @@ typedef struct { unsigned int id; unsigned int active; unsigned long vdso_base; +#ifdef CONFIG_PPC_MM_SLICES + u64 low_slices_psize; /* SLB page size encodings */ + u64 high_slices_psize; /* 4 bits per slice for now */ + u16 user_psize; /* page size index */ +#endif } mm_context_t; /* Page size definitions, common between 32 and 64-bit diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index b445e0af4c2..db645ec842b 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -262,8 +262,7 @@ extern void hash_failure_debug(unsigned long ea, unsigned long access, extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, unsigned long pstart, unsigned long prot, int psize, int ssize); -extern void add_gpage(unsigned long addr, unsigned long page_size, - unsigned long number_of_pages); +extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); extern void hpte_init_native(void); diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 698b3063868..f0145522cfb 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -175,14 +175,16 @@ extern u64 ppc64_rma_size; #define MMU_PAGE_64K_AP 3 /* "Admixed pages" (hash64 only) */ #define MMU_PAGE_256K 4 #define MMU_PAGE_1M 5 -#define MMU_PAGE_8M 6 -#define MMU_PAGE_16M 7 -#define MMU_PAGE_256M 8 -#define MMU_PAGE_1G 9 -#define MMU_PAGE_16G 10 -#define MMU_PAGE_64G 11 -#define MMU_PAGE_COUNT 12 - +#define MMU_PAGE_4M 6 +#define MMU_PAGE_8M 7 +#define MMU_PAGE_16M 8 +#define MMU_PAGE_64M 9 +#define MMU_PAGE_256M 10 +#define MMU_PAGE_1G 11 +#define MMU_PAGE_16G 12 +#define MMU_PAGE_64G 13 + +#define MMU_PAGE_COUNT 14 #if defined(CONFIG_PPC_STD_MMU_64) /* 64-bit classic hash table MMU */ diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index 2cd664ef0a5..dd9c4fd038e 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -36,6 +36,18 @@ #define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#ifndef __ASSEMBLY__ +#ifdef CONFIG_HUGETLB_PAGE +extern unsigned int HPAGE_SHIFT; +#else +#define HPAGE_SHIFT PAGE_SHIFT +#endif +#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) +#endif + /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ #define __HAVE_ARCH_GATE_AREA 1 @@ -158,6 +170,24 @@ extern phys_addr_t kernstart_addr; #define is_kernel_addr(x) ((x) >= PAGE_OFFSET) #endif +/* + * Use the top bit of the higher-level page table entries to indicate whether + * the entries we point to contain hugepages. This works because we know that + * the page tables live in kernel space. If we ever decide to support having + * page tables at arbitrary addresses, this breaks and will have to change. + */ +#ifdef CONFIG_PPC64 +#define PD_HUGE 0x8000000000000000 +#else +#define PD_HUGE 0x80000000 +#endif + +/* + * Some number of bits at the level of the page table that points to + * a hugepte are used to encode the size. This masks those bits. + */ +#define HUGEPD_SHIFT_MASK 0x3f + #ifndef __ASSEMBLY__ #undef STRICT_MM_TYPECHECKS @@ -243,7 +273,6 @@ typedef unsigned long pgprot_t; #endif typedef struct { signed long pd; } hugepd_t; -#define HUGEPD_SHIFT_MASK 0x3f #ifdef CONFIG_HUGETLB_PAGE static inline int hugepd_ok(hugepd_t hpd) diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h index 9356262fd3c..fb40ede6bc0 100644 --- a/arch/powerpc/include/asm/page_64.h +++ b/arch/powerpc/include/asm/page_64.h @@ -64,17 +64,6 @@ extern void copy_page(void *to, void *from); /* Log 2 of page table size */ extern u64 ppc64_pft_size; -/* Large pages size */ -#ifdef CONFIG_HUGETLB_PAGE -extern unsigned int HPAGE_SHIFT; -#else -#define HPAGE_SHIFT PAGE_SHIFT -#endif -#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) -#define HPAGE_MASK (~(HPAGE_SIZE - 1)) -#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1) - #endif /* __ASSEMBLY__ */ #ifdef CONFIG_PPC_MM_SLICES diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h index 082d515930a..0156702ba24 100644 --- a/arch/powerpc/include/asm/pte-book3e.h +++ b/arch/powerpc/include/asm/pte-book3e.h @@ -72,6 +72,9 @@ #define PTE_RPN_SHIFT (24) #endif +#define PTE_WIMGE_SHIFT (19) +#define PTE_BAP_SHIFT (2) + /* On 32-bit, we never clear the top part of the PTE */ #ifdef CONFIG_PPC32 #define _PTE_NONE_MASK 0xffffffff00000000ULL -- cgit v1.2.3 From 14b9247019432fc25e606b78262eb16a4a33b8ed Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Fri, 8 Jul 2011 11:12:42 +0000 Subject: powerpc/mpic: Add support for discontiguous cores There is one place in the MPIC driver that assumes that the cores are numbered from 0 to n-1. However, this is not true if the CPUs are not numbered sequentially. This can happen on a eight-core SOC where cores two and three are removed in the device tree. So instead of blindly looping, we iterate over the discovered CPUs and use the SMP ID as the index. This means that we no longer ask the MPIC how many CPUs there are, so we also delete mpic->num_cpus. We also catch if the number of CPUs in the SOC exceeds the number that the MPIC supports. This should never happen, of course, but it's good to be sure. Signed-off-by: Timur Tabi Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/mpic.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index df18989e78d..e6fae49e0b7 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -273,8 +273,6 @@ struct mpic unsigned int irq_count; /* Number of sources */ unsigned int num_sources; - /* Number of CPUs */ - unsigned int num_cpus; /* default senses array */ unsigned char *senses; unsigned int senses_count; -- cgit v1.2.3 From 6c493685f1b209dd4ae41eb52c818cf12da20def Mon Sep 17 00:00:00 2001 From: Jim Keniston Date: Mon, 25 Jul 2011 07:54:50 +0000 Subject: powerpc/nvram: Add compression to fit more oops output into NVRAM Capture more than twice as much text from the printk buffer, and compress it to fit it in the lnx,oops-log NVRAM partition. You can view the compressed text using the new (as of July 20) --unzip option of the nvram command in the powerpc-utils package. [BenH: Added select of ZLIB_DEFLATE] Signed-off-by: Jim Keniston Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/rtas.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 58625d1e780..41f69ae79d4 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -249,10 +249,12 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); #define ERR_FLAG_ALREADY_LOGGED 0x0 #define ERR_FLAG_BOOT 0x1 /* log was pulled from NVRAM on boot */ #define ERR_TYPE_RTAS_LOG 0x2 /* from rtas event-scan */ -#define ERR_TYPE_KERNEL_PANIC 0x4 /* from panic() */ +#define ERR_TYPE_KERNEL_PANIC 0x4 /* from die()/panic() */ +#define ERR_TYPE_KERNEL_PANIC_GZ 0x8 /* ditto, compressed */ /* All the types and not flags */ -#define ERR_TYPE_MASK (ERR_TYPE_RTAS_LOG | ERR_TYPE_KERNEL_PANIC) +#define ERR_TYPE_MASK \ + (ERR_TYPE_RTAS_LOG | ERR_TYPE_KERNEL_PANIC | ERR_TYPE_KERNEL_PANIC_GZ) #define RTAS_DEBUG KERN_DEBUG "RTAS: " -- cgit v1.2.3 From c26afe9e8591f306d79aab8071f1d34e4f60b700 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 31 Aug 2011 06:32:26 +0000 Subject: powerpc/ps3: Add gelic udbg driver Add a new udbg driver for the PS3 gelic Ehthernet device. This driver shares only a few stucture and constant definitions with the gelic Ethernet device driver, so is implemented as a stand-alone driver with no dependencies on the gelic Ethernet device driver. Signed-off-by: Hector Martin Signed-off-by: Andre Heider Signed-off-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/udbg.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 93e05d1b34b..7cf796fa03f 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -54,6 +54,7 @@ extern void __init udbg_init_40x_realmode(void); extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); extern void __init udbg_init_wsp(void); +extern void __init udbg_init_ps3gelic(void); #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_UDBG_H */ -- cgit v1.2.3 From a200d8e44649de2cbb39de95f42ad4ef5dc8dc22 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 24 Jul 2011 16:33:12 +0000 Subject: powerpc/numa: Enable SD_WAKE_AFFINE in node definition When chasing a performance issue on ppc64, I noticed tasks communicating via a pipe would often end up on different nodes. It turns out SD_WAKE_AFFINE is not set in our node defition. Commit 9fcd18c9e63e (sched: re-tune balancing) enabled SD_WAKE_AFFINE in the node definition for x86 and we need a similar change for ppc64. I used lmbench lat_ctx and perf bench pipe to verify this fix. Each benchmark was run 10 times and the average taken. lmbench lat_ctx: before: 66565 ops/sec after: 204700 ops/sec 3.1x faster perf bench pipe: before: 5.6570 usecs after: 1.3470 usecs 4.2x faster Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 7ef0d90defc..6a7e7251cc0 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -73,7 +73,7 @@ static inline int pcibus_to_node(struct pci_bus *bus) | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ - | 0*SD_WAKE_AFFINE \ + | 1*SD_WAKE_AFFINE \ | 0*SD_PREFER_LOCAL \ | 0*SD_SHARE_CPUPOWER \ | 0*SD_POWERSAVINGS_BALANCE \ -- cgit v1.2.3 From d4761ad2ef18ec2c9a0037d6649c0afc4a7b907d Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 24 Jul 2011 16:33:14 +0000 Subject: powerpc/numa: Increase SD_NODES_PER_DOMAIN to 32. The largest POWER7 boxes have 32 nodes. SD_NODES_PER_DOMAIN groups nodes into chunks of 16 and adds a global balancing domain (SD_ALLNODES) above it. If we bump SD_NODES_PER_DOMAIN to 32, then we avoid this extra level of balancing on our largest boxes. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/topology.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 6a7e7251cc0..d1c1d312f80 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -28,6 +28,12 @@ struct device_node; */ #define RECLAIM_DISTANCE 10 +/* + * Avoid creating an extra level of balancing (SD_ALLNODES) on the largest + * POWER7 boxes which have a maximum of 32 nodes. + */ +#define SD_NODES_PER_DOMAIN 32 + #include static inline int cpu_to_node(int cpu) -- cgit v1.2.3 From 7bebcf0925f09224393a8992af706fa39aa10395 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 24 Jul 2011 16:33:15 +0000 Subject: powerpc/numa: Disable NEWIDLE balancing at node level On big POWER7 boxes we see large amounts of CPU time in system processes like workqueue and watchdog kernel threads. We currently rebalance the entire machine each time a task goes idle and this is very expensive on large machines. Disable newidle balancing at the node level and rely on the scheduler tick to rebalance across nodes. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/topology.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index d1c1d312f80..13efa4d95e7 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -75,7 +75,7 @@ static inline int pcibus_to_node(struct pci_bus *bus) .forkexec_idx = 0, \ \ .flags = 1*SD_LOAD_BALANCE \ - | 1*SD_BALANCE_NEWIDLE \ + | 0*SD_BALANCE_NEWIDLE \ | 1*SD_BALANCE_EXEC \ | 1*SD_BALANCE_FORK \ | 0*SD_BALANCE_WAKE \ -- cgit v1.2.3 From e377bc5d49fdbcb5f0e559b644d806a15454d407 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 24 Jul 2011 16:33:16 +0000 Subject: powerpc/numa: Remove duplicate RECLAIM_DISTANCE definition We have two identical definitions of RECLAIM_DISTANCE, looks like the patch got applied twice. Remove one. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/topology.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 13efa4d95e7..1e104af0848 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -18,16 +18,6 @@ struct device_node; */ #define RECLAIM_DISTANCE 10 -/* - * Before going off node we want the VM to try and reclaim from the local - * node. It does this if the remote distance is larger than RECLAIM_DISTANCE. - * With the default REMOTE_DISTANCE of 20 and the default RECLAIM_DISTANCE of - * 20, we never reclaim and go off node straight away. - * - * To fix this we choose a smaller value of RECLAIM_DISTANCE. - */ -#define RECLAIM_DISTANCE 10 - /* * Avoid creating an extra level of balancing (SD_ALLNODES) on the largest * POWER7 boxes which have a maximum of 32 nodes. -- cgit v1.2.3 From a11940978bd598e65996b4f807cf4904793f7025 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Aug 2011 20:44:24 +0000 Subject: powerpc: Fix oops when echoing bad values to /sys/devices/system/memory/probe If we echo an address the hypervisor doesn't like to /sys/devices/system/memory/probe we oops the box: # echo 0x10000000000 > /sys/devices/system/memory/probe kernel BUG at arch/powerpc/mm/hash_utils_64.c:541! The backtrace is: create_section_mapping arch_add_memory add_memory memory_probe_store sysdev_class_store sysfs_write_file vfs_write SyS_write In create_section_mapping we BUG if htab_bolt_mapping returned an error. A better approach is to return an error which will propagate back to userspace. Rerunning the test with this patch applied: # echo 0x10000000000 > /sys/devices/system/memory/probe -bash: echo: write error: Invalid argument Signed-off-by: Anton Blanchard Cc: stable@kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/sparsemem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index 54a47ea2c3a..0c5fa314561 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -16,7 +16,7 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern void create_section_mapping(unsigned long start, unsigned long end); +extern int create_section_mapping(unsigned long start, unsigned long end); extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_NUMA extern int hot_add_scn_to_nid(unsigned long scn_addr); -- cgit v1.2.3 From fb82b83970a32263698e54a8779d2ce88cd3b060 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:44:49 +0000 Subject: powerpc/smp: More generic support for "soft hotplug" This adds more generic support for doing CPU hotplug with a simple idle loop and no actual reset of the processors. The generic smp_generic_kick_cpu() does the hotplug bringup trick if the PACA shows that the CPU has already been started at boot and we provide an accessor for the CPU state. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/smp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h index 15a70b7f638..adba970ce91 100644 --- a/arch/powerpc/include/asm/smp.h +++ b/arch/powerpc/include/asm/smp.h @@ -65,6 +65,7 @@ int generic_cpu_disable(void); void generic_cpu_die(unsigned int cpu); void generic_mach_cpu_die(void); void generic_set_cpu_dead(unsigned int cpu); +int generic_check_cpu_restart(unsigned int cpu); #endif #ifdef CONFIG_PPC64 -- cgit v1.2.3 From 27f4488872d9ef2a4b9aa2be58fb0789d6c0ba84 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 18:27:58 +0000 Subject: powerpc/powernv: Add OPAL takeover from PowerVM On machines supporting the OPAL firmware version 1, the system is initially booted under pHyp. We then use a special hypercall to verify if OPAL is available and if it is, we then trigger a "takeover" which disables pHyp and loads the OPAL runtime firmware, giving control to the kernel in hypervisor mode. This patch add the necessary code to detect that the OPAL takeover capability is present when running under PowerVM (aka pHyp) and perform said takeover to get hypervisor control of the processor. To perform the takeover, we must first use RTAS (within Open Firmware runtime environment) to start all processors & threads, in order to give control to OPAL on all of them. We then call the takeover hypercall on everybody, OPAL will re-enter the kernel main entry point passing it a flat device-tree. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 50 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 arch/powerpc/include/asm/opal.h (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h new file mode 100644 index 00000000000..ecdb283f8b7 --- /dev/null +++ b/arch/powerpc/include/asm/opal.h @@ -0,0 +1,50 @@ +/* + * PowerNV OPAL definitions. + * + * Copyright 2011 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef __OPAL_H +#define __OPAL_H + +/****** Takeover interface ********/ + +/* PAPR H-Call used to querty the HAL existence and/or instanciate + * it from within pHyp (tech preview only). + * + * This is exclusively used in prom_init.c + */ + +#ifndef __ASSEMBLY__ + +struct opal_takeover_args { + u64 k_image; /* r4 */ + u64 k_size; /* r5 */ + u64 k_entry; /* r6 */ + u64 k_entry2; /* r7 */ + u64 hal_addr; /* r8 */ + u64 rd_image; /* r9 */ + u64 rd_size; /* r10 */ + u64 rd_loc; /* r11 */ +}; + +extern long opal_query_takeover(u64 *hal_size, u64 *hal_align); + +extern long opal_do_takeover(struct opal_takeover_args *args); + +extern int opal_enter_rtas(struct rtas_args *args, + unsigned long data, + unsigned long entry); + + +#endif /* __ASSEMBLY__ */ + +/****** OPAL APIs ******/ + + +#endif /* __OPAL_H */ -- cgit v1.2.3 From 14a43e69ed257a1fadadf9fea2c05adb1686419f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:44:57 +0000 Subject: powerpc/powernv: Basic support for OPAL Add definition of OPAL interfaces along with the wrappers to call into OPAL runtime and the early device-tree parsing hook to locate the OPAL runtime firmware. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/firmware.h | 10 + arch/powerpc/include/asm/opal.h | 382 +++++++++++++++++++++++++++++++++++- 2 files changed, 391 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 3a6c586c4e4..14db29b18d0 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -48,6 +48,8 @@ #define FW_FEATURE_CMO ASM_CONST(0x0000000002000000) #define FW_FEATURE_VPHN ASM_CONST(0x0000000004000000) #define FW_FEATURE_XCMO ASM_CONST(0x0000000008000000) +#define FW_FEATURE_OPAL ASM_CONST(0x0000000010000000) +#define FW_FEATURE_OPALv2 ASM_CONST(0x0000000020000000) #ifndef __ASSEMBLY__ @@ -65,6 +67,8 @@ enum { FW_FEATURE_PSERIES_ALWAYS = 0, FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES | FW_FEATURE_LPAR, + FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2, + FW_FEATURE_POWERNV_ALWAYS = 0, FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1, FW_FEATURE_CELLEB_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_BEAT, @@ -78,6 +82,9 @@ enum { #ifdef CONFIG_PPC_ISERIES FW_FEATURE_ISERIES_POSSIBLE | #endif +#ifdef CONFIG_PPC_POWERNV + FW_FEATURE_POWERNV_POSSIBLE | +#endif #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_POSSIBLE | #endif @@ -95,6 +102,9 @@ enum { #ifdef CONFIG_PPC_ISERIES FW_FEATURE_ISERIES_ALWAYS & #endif +#ifdef CONFIG_PPC_POWERNV + FW_FEATURE_POWERNV_ALWAYS & +#endif #ifdef CONFIG_PPC_PS3 FW_FEATURE_PS3_ALWAYS & #endif diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index ecdb283f8b7..c7a3202d10a 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -37,14 +37,394 @@ extern long opal_query_takeover(u64 *hal_size, u64 *hal_align); extern long opal_do_takeover(struct opal_takeover_args *args); +struct rtas_args; extern int opal_enter_rtas(struct rtas_args *args, unsigned long data, unsigned long entry); - #endif /* __ASSEMBLY__ */ /****** OPAL APIs ******/ +/* Return codes */ +#define OPAL_SUCCESS 0 +#define OPAL_PARAMETER -1 +#define OPAL_BUSY -2 +#define OPAL_PARTIAL -3 +#define OPAL_CONSTRAINED -4 +#define OPAL_CLOSED -5 +#define OPAL_HARDWARE -6 +#define OPAL_UNSUPPORTED -7 +#define OPAL_PERMISSION -8 +#define OPAL_NO_MEM -9 +#define OPAL_RESOURCE -10 +#define OPAL_INTERNAL_ERROR -11 +#define OPAL_BUSY_EVENT -12 +#define OPAL_HARDWARE_FROZEN -13 + +/* API Tokens (in r0) */ +#define OPAL_CONSOLE_WRITE 1 +#define OPAL_CONSOLE_READ 2 +#define OPAL_RTC_READ 3 +#define OPAL_RTC_WRITE 4 +#define OPAL_CEC_POWER_DOWN 5 +#define OPAL_CEC_REBOOT 6 +#define OPAL_READ_NVRAM 7 +#define OPAL_WRITE_NVRAM 8 +#define OPAL_HANDLE_INTERRUPT 9 +#define OPAL_POLL_EVENTS 10 +#define OPAL_PCI_SET_HUB_TCE_MEMORY 11 +#define OPAL_PCI_SET_PHB_TCE_MEMORY 12 +#define OPAL_PCI_CONFIG_READ_BYTE 13 +#define OPAL_PCI_CONFIG_READ_HALF_WORD 14 +#define OPAL_PCI_CONFIG_READ_WORD 15 +#define OPAL_PCI_CONFIG_WRITE_BYTE 16 +#define OPAL_PCI_CONFIG_WRITE_HALF_WORD 17 +#define OPAL_PCI_CONFIG_WRITE_WORD 18 +#define OPAL_SET_XIVE 19 +#define OPAL_GET_XIVE 20 +#define OPAL_GET_COMPLETION_TOKEN_STATUS 21 /* obsolete */ +#define OPAL_REGISTER_OPAL_EXCEPTION_HANDLER 22 +#define OPAL_PCI_EEH_FREEZE_STATUS 23 +#define OPAL_PCI_SHPC 24 +#define OPAL_CONSOLE_WRITE_BUFFER_SPACE 25 +#define OPAL_PCI_EEH_FREEZE_CLEAR 26 +#define OPAL_PCI_PHB_MMIO_ENABLE 27 +#define OPAL_PCI_SET_PHB_MEM_WINDOW 28 +#define OPAL_PCI_MAP_PE_MMIO_WINDOW 29 +#define OPAL_PCI_SET_PHB_TABLE_MEMORY 30 +#define OPAL_PCI_SET_PE 31 +#define OPAL_PCI_SET_PELTV 32 +#define OPAL_PCI_SET_MVE 33 +#define OPAL_PCI_SET_MVE_ENABLE 34 +#define OPAL_PCI_GET_XIVE_REISSUE 35 +#define OPAL_PCI_SET_XIVE_REISSUE 36 +#define OPAL_PCI_SET_XIVE_PE 37 +#define OPAL_GET_XIVE_SOURCE 38 +#define OPAL_GET_MSI_32 39 +#define OPAL_GET_MSI_64 40 +#define OPAL_START_CPU 41 +#define OPAL_QUERY_CPU_STATUS 42 +#define OPAL_WRITE_OPPANEL 43 +#define OPAL_PCI_MAP_PE_DMA_WINDOW 44 +#define OPAL_PCI_MAP_PE_DMA_WINDOW_REAL 45 +#define OPAL_PCI_RESET 49 + +#ifndef __ASSEMBLY__ + +/* Other enums */ +enum OpalVendorApiTokens { + OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999 +}; +enum OpalFreezeState { + OPAL_EEH_STOPPED_NOT_FROZEN = 0, + OPAL_EEH_STOPPED_MMIO_FREEZE = 1, + OPAL_EEH_STOPPED_DMA_FREEZE = 2, + OPAL_EEH_STOPPED_MMIO_DMA_FREEZE = 3, + OPAL_EEH_STOPPED_RESET = 4, + OPAL_EEH_STOPPED_TEMP_UNAVAIL = 5, + OPAL_EEH_STOPPED_PERM_UNAVAIL = 6 +}; +enum OpalEehFreezeActionToken { + OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO = 1, + OPAL_EEH_ACTION_CLEAR_FREEZE_DMA = 2, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL = 3 +}; +enum OpalPciStatusToken { + OPAL_EEH_PHB_NO_ERROR = 0, + OPAL_EEH_PHB_FATAL = 1, + OPAL_EEH_PHB_RECOVERABLE = 2, + OPAL_EEH_PHB_BUS_ERROR = 3, + OPAL_EEH_PCI_NO_DEVSEL = 4, + OPAL_EEH_PCI_TA = 5, + OPAL_EEH_PCIEX_UR = 6, + OPAL_EEH_PCIEX_CA = 7, + OPAL_EEH_PCI_MMIO_ERROR = 8, + OPAL_EEH_PCI_DMA_ERROR = 9 +}; +enum OpalShpcAction { + OPAL_SHPC_GET_LINK_STATE = 0, + OPAL_SHPC_GET_SLOT_STATE = 1 +}; +enum OpalShpcLinkState { + OPAL_SHPC_LINK_DOWN = 0, + OPAL_SHPC_LINK_UP = 1 +}; +enum OpalMmioWindowType { + OPAL_M32_WINDOW_TYPE = 1, + OPAL_M64_WINDOW_TYPE = 2, + OPAL_IO_WINDOW_TYPE = 3 +}; +enum OpalShpcSlotState { + OPAL_SHPC_DEV_NOT_PRESENT = 0, + OPAL_SHPC_DEV_PRESENT = 1 +}; +enum OpalExceptionHandler { + OPAL_MACHINE_CHECK_HANDLER = 1, + OPAL_HYPERVISOR_MAINTENANCE_HANDLER = 2, + OPAL_SOFTPATCH_HANDLER = 3 +}; +enum OpalPendingState { + OPAL_EVENT_OPAL_INTERNAL = 0x1, + OPAL_EVENT_NVRAM = 0x2, + OPAL_EVENT_RTC = 0x4, + OPAL_EVENT_CONSOLE_OUTPUT = 0x8, + OPAL_EVENT_CONSOLE_INPUT = 0x10 +}; + +/* Machine check related definitions */ +enum OpalMCE_Version { + OpalMCE_V1 = 1, +}; + +enum OpalMCE_Severity { + OpalMCE_SEV_NO_ERROR = 0, + OpalMCE_SEV_WARNING = 1, + OpalMCE_SEV_ERROR_SYNC = 2, + OpalMCE_SEV_FATAL = 3, +}; + +enum OpalMCE_Disposition { + OpalMCE_DISPOSITION_RECOVERED = 0, + OpalMCE_DISPOSITION_NOT_RECOVERED = 1, +}; + +enum OpalMCE_Initiator { + OpalMCE_INITIATOR_UNKNOWN = 0, + OpalMCE_INITIATOR_CPU = 1, +}; + +enum OpalMCE_ErrorType { + OpalMCE_ERROR_TYPE_UNKNOWN = 0, + OpalMCE_ERROR_TYPE_UE = 1, + OpalMCE_ERROR_TYPE_SLB = 2, + OpalMCE_ERROR_TYPE_ERAT = 3, + OpalMCE_ERROR_TYPE_TLB = 4, +}; + +enum OpalMCE_UeErrorType { + OpalMCE_UE_ERROR_INDETERMINATE = 0, + OpalMCE_UE_ERROR_IFETCH = 1, + OpalMCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH = 2, + OpalMCE_UE_ERROR_LOAD_STORE = 3, + OpalMCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE = 4, +}; + +enum OpalMCE_SlbErrorType { + OpalMCE_SLB_ERROR_INDETERMINATE = 0, + OpalMCE_SLB_ERROR_PARITY = 1, + OpalMCE_SLB_ERROR_MULTIHIT = 2, +}; + +enum OpalMCE_EratErrorType { + OpalMCE_ERAT_ERROR_INDETERMINATE = 0, + OpalMCE_ERAT_ERROR_PARITY = 1, + OpalMCE_ERAT_ERROR_MULTIHIT = 2, +}; + +enum OpalMCE_TlbErrorType { + OpalMCE_TLB_ERROR_INDETERMINATE = 0, + OpalMCE_TLB_ERROR_PARITY = 1, + OpalMCE_TLB_ERROR_MULTIHIT = 2, +}; + +enum OpalThreadStatus { + OPAL_THREAD_INACTIVE = 0x0, + OPAL_THREAD_STARTED = 0x1 +}; + +enum OpalPciBusCompare { + OpalPciBusAny = 0, /* Any bus number match */ + OpalPciBus3Bits = 2, /* Match top 3 bits of bus number */ + OpalPciBus4Bits = 3, /* Match top 4 bits of bus number */ + OpalPciBus5Bits = 4, /* Match top 5 bits of bus number */ + OpalPciBus6Bits = 5, /* Match top 6 bits of bus number */ + OpalPciBus7Bits = 6, /* Match top 7 bits of bus number */ + OpalPciBusAll = 7, /* Match bus number exactly */ +}; + +enum OpalDeviceCompare { + OPAL_IGNORE_RID_DEVICE_NUMBER = 0, + OPAL_COMPARE_RID_DEVICE_NUMBER = 1 +}; + +enum OpalFuncCompare { + OPAL_IGNORE_RID_FUNCTION_NUMBER = 0, + OPAL_COMPARE_RID_FUNCTION_NUMBER = 1 +}; + +enum OpalPeAction { + OPAL_UNMAP_PE = 0, + OPAL_MAP_PE = 1 +}; + +enum OpalPciResetAndReinitScope { + OPAL_PHB_COMPLETE = 1, OPAL_PCI_LINK = 2, OPAL_PHB_ERROR = 3, + OPAL_PCI_HOT_RESET = 4, OPAL_PCI_FUNDAMENTAL_RESET = 5, + OPAL_PCI_IODA_RESET = 6, +}; + +enum OpalPciResetState { OPAL_DEASSERT_RESET = 0, OPAL_ASSERT_RESET = 1 }; + +struct opal_machine_check_event { + enum OpalMCE_Version version:8; /* 0x00 */ + uint8_t in_use; /* 0x01 */ + enum OpalMCE_Severity severity:8; /* 0x02 */ + enum OpalMCE_Initiator initiator:8; /* 0x03 */ + enum OpalMCE_ErrorType error_type:8; /* 0x04 */ + enum OpalMCE_Disposition disposition:8; /* 0x05 */ + uint8_t reserved_1[2]; /* 0x06 */ + uint64_t gpr3; /* 0x08 */ + uint64_t srr0; /* 0x10 */ + uint64_t srr1; /* 0x18 */ + union { /* 0x20 */ + struct { + enum OpalMCE_UeErrorType ue_error_type:8; + uint8_t effective_address_provided; + uint8_t physical_address_provided; + uint8_t reserved_1[5]; + uint64_t effective_address; + uint64_t physical_address; + uint8_t reserved_2[8]; + } ue_error; + + struct { + enum OpalMCE_SlbErrorType slb_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } slb_error; + + struct { + enum OpalMCE_EratErrorType erat_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } erat_error; + + struct { + enum OpalMCE_TlbErrorType tlb_error_type:8; + uint8_t effective_address_provided; + uint8_t reserved_1[6]; + uint64_t effective_address; + uint8_t reserved_2[16]; + } tlb_error; + } u; +}; + +typedef struct oppanel_line { + /* XXX */ +} oppanel_line_t; + +/* API functions */ +int64_t opal_console_write(int64_t term_number, int64_t *length, + const uint8_t *buffer); +int64_t opal_console_read(int64_t term_number, int64_t *length, + uint8_t *buffer); +int64_t opal_console_write_buffer_space(int64_t term_number, + int64_t *length); +int64_t opal_rtc_read(uint32_t *year_month_day, + uint64_t *hour_minute_second_millisecond); +int64_t opal_rtc_write(uint32_t year_month_day, + uint64_t hour_minute_second_millisecond); +int64_t opal_cec_power_down(uint64_t request); +int64_t opal_cec_reboot(void); +int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset); +int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset); +int64_t opal_handle_interrupt(uint64_t isn, uint64_t *outstanding_event_mask); +int64_t opal_poll_events(uint64_t *outstanding_event_mask); +int64_t opal_pci_set_hub_tce_memory(uint64_t hub_id, uint64_t tce_mem_addr, + uint64_t tce_mem_size); +int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id, uint64_t tce_mem_addr, + uint64_t tce_mem_size); +int64_t opal_pci_config_read_byte(uint64_t phb_id, uint64_t bus_dev_func, + uint64_t offset, uint8_t *data); +int64_t opal_pci_config_read_half_word(uint64_t phb_id, uint64_t bus_dev_func, + uint64_t offset, uint16_t *data); +int64_t opal_pci_config_read_word(uint64_t phb_id, uint64_t bus_dev_func, + uint64_t offset, uint32_t *data); +int64_t opal_pci_config_write_byte(uint64_t phb_id, uint64_t bus_dev_func, + uint64_t offset, uint8_t data); +int64_t opal_pci_config_write_half_word(uint64_t phb_id, uint64_t bus_dev_func, + uint64_t offset, uint16_t data); +int64_t opal_pci_config_write_word(uint64_t phb_id, uint64_t bus_dev_func, + uint64_t offset, uint32_t data); +int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority); +int64_t opal_get_xive(uint32_t isn, uint16_t *server, uint8_t *priority); +int64_t opal_register_exception_handler(uint64_t opal_exception, + uint64_t handler_address, + uint64_t glue_cache_line); +int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number, + uint8_t *freeze_state, + uint16_t *pci_error_type, + uint64_t *phb_status); +int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number, + uint64_t eeh_action_token); +int64_t opal_pci_shpc(uint64_t phb_id, uint64_t shpc_action, uint8_t *state); + + + +int64_t opal_pci_phb_mmio_enable(uint64_t phb_id, uint16_t window_type, + uint16_t window_num, uint16_t enable); +int64_t opal_pci_set_phb_mem_window(uint64_t phb_id, uint16_t window_type, + uint16_t window_num, + uint64_t starting_real_address, + uint64_t starting_pci_address, + uint16_t segment_size); +int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number, + uint16_t window_type, uint16_t window_num, + uint16_t segment_num); +int64_t opal_pci_set_phb_table_memory(uint64_t phb_id, uint64_t rtt_addr, + uint64_t ivt_addr, uint64_t ivt_len, + uint64_t reject_array_addr, + uint64_t peltv_addr); +int64_t opal_pci_set_pe(uint64_t phb_id, uint64_t pe_number, uint64_t bus_dev_func, + uint8_t bus_compare, uint8_t dev_compare, uint8_t func_compare, + uint8_t pe_action); +int64_t opal_pci_set_peltv(uint64_t phb_id, uint32_t parent_pe, uint32_t child_pe, + uint8_t state); +int64_t opal_pci_set_mve(uint64_t phb_id, uint32_t mve_number, uint32_t pe_number); +int64_t opal_pci_set_mve_enable(uint64_t phb_id, uint32_t mve_number, + uint32_t state); +int64_t opal_pci_get_xive_reissue(uint64_t phb_id, uint32_t xive_number, + uint8_t *p_bit, uint8_t *q_bit); +int64_t opal_pci_set_xive_reissue(uint64_t phb_id, uint32_t xive_number, + uint8_t p_bit, uint8_t q_bit); +int64_t opal_pci_set_xive_pe(uint64_t phb_id, uint32_t pe_number, + uint32_t xive_num); +int64_t opal_get_xive_source(uint64_t phb_id, uint32_t xive_num, + int32_t *interrupt_source_number); +int64_t opal_get_msi_32(uint64_t phb_id, uint32_t mve_number, uint32_t xive_num, + uint8_t msi_range, uint32_t *msi_address, + uint32_t *message_data); +int64_t opal_get_msi_64(uint64_t phb_id, uint32_t mve_number, + uint32_t xive_num, uint8_t msi_range, + uint64_t *msi_address, uint32_t *message_data); +int64_t opal_start_cpu(uint64_t thread_number, uint64_t start_address); +int64_t opal_query_cpu_status(uint64_t thread_number, uint8_t *thread_status); +int64_t opal_write_oppanel(oppanel_line_t *lines, uint64_t num_lines); +int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint16_t pe_number, uint16_t window_id, + uint16_t tce_levels, uint64_t tce_table_addr, + uint64_t tce_table_size, uint64_t tce_page_size); +int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, uint16_t pe_number, + uint16_t dma_window_number, uint64_t pci_start_addr, + uint64_t pci_mem_size); +int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, uint8_t assert_state); + +/* Internal functions */ +extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); + +extern int opal_get_chars(uint32_t vtermno, char *buf, int count); +extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); + +extern void hvc_opal_init_early(void); + +/* Internal functions */ +extern int early_init_dt_scan_opal(unsigned long node, const char *uname, + int depth, void *data); + +#endif /* __ASSEMBLY__ */ #endif /* __OPAL_H */ -- cgit v1.2.3 From daea1175a9f0f70eab5b33e2827d57ba8c686816 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:44:59 +0000 Subject: powerpc/powernv: Support for OPAL console This adds a udbg and an hvc console backend for supporting a console using the OPAL console interfaces. On OPAL v1 we have hvc0 mapped to whatever console the system was configured for (network or hvsi serial port) via the service processor. On OPAL v2 we have hvcN mapped to the Nth console provided by OPAL which generally corresponds to: hvc0 : network console (raw protocol) hvc1 : serial port S1 (hvsi) hvc2 : serial port S2 (hvsi) Note: At this point, early debug console only works with OPAL v1 and shouldn't be enabled in a normal kernel. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 5 +++++ arch/powerpc/include/asm/udbg.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index c7a3202d10a..749de00a02d 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -425,6 +425,11 @@ extern void hvc_opal_init_early(void); extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); +extern int opal_get_chars(uint32_t vtermno, char *buf, int count); +extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); + +extern void hvc_opal_init_early(void); + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_H */ diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 7cf796fa03f..6587ec7bc6e 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -55,6 +55,8 @@ extern void __init udbg_init_cpm(void); extern void __init udbg_init_usbgecko(void); extern void __init udbg_init_wsp(void); extern void __init udbg_init_ps3gelic(void); +extern void __init udbg_init_debug_opal_raw(void); +extern void __init udbg_init_debug_opal_hvsi(void); #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_UDBG_H */ -- cgit v1.2.3 From 628daa8d5abfd904a7329a660c5c374212230123 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:45:01 +0000 Subject: powerpc/powernv: Add RTC and NVRAM support plus RTAS fallbacks Implements OPAL RTC and NVRAM support and wire all that up to the powernv platform. We use RTAS for RTC as a fallback if available. Using RTAS for nvram is not supported yet, pending some rework/cleanup and generalization of the pSeries & CHRP code. We also use RTAS fallbacks for power off and reboot Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 749de00a02d..77ebe50020a 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -430,6 +430,12 @@ extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); extern void hvc_opal_init_early(void); +struct rtc_time; +extern int opal_set_rtc_time(struct rtc_time *tm); +extern void opal_get_rtc_time(struct rtc_time *tm); +extern unsigned long opal_get_boot_time(void); +extern void opal_nvram_init(void); + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_H */ -- cgit v1.2.3 From 5c7c1e9444d8bfb721a27a35bba3eeb5236c75d8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:45:02 +0000 Subject: powerpc/powernv: Add OPAL ICS backend OPAL handles HW access to the various ICS or equivalent chips for us (with the exception of p5ioc2 based HEA which uses a different backend) similarily to what RTAS does on pSeries. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/xics.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/xics.h b/arch/powerpc/include/asm/xics.h index b183a406201..bd6c401c0ee 100644 --- a/arch/powerpc/include/asm/xics.h +++ b/arch/powerpc/include/asm/xics.h @@ -27,10 +27,18 @@ #define MAX_NUM_PRIORITIES 3 /* Native ICP */ +#ifdef CONFIG_PPC_ICP_NATIVE extern int icp_native_init(void); +#else +static inline int icp_native_init(void) { return -ENODEV; } +#endif /* PAPR ICP */ +#ifdef CONFIG_PPC_ICP_HV extern int icp_hv_init(void); +#else +static inline int icp_hv_init(void) { return -ENODEV; } +#endif /* ICP ops */ struct icp_ops { @@ -51,7 +59,18 @@ extern const struct icp_ops *icp_ops; extern int ics_native_init(void); /* RTAS ICS */ +#ifdef CONFIG_PPC_ICS_RTAS extern int ics_rtas_init(void); +#else +static inline int ics_rtas_init(void) { return -ENODEV; } +#endif + +/* HAL ICS */ +#ifdef CONFIG_PPC_POWERNV +extern int ics_opal_init(void); +#else +static inline int ics_opal_init(void) { return -ENODEV; } +#endif /* ICS instance, hooked up to chip_data of an irq */ struct ics { -- cgit v1.2.3 From ed79ba9e15f84cef05aba5cbfe6e93f9b43c31f4 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:45:04 +0000 Subject: powerpc/powernv: Machine check and other system interrupts OPAL can handle various interrupt for us such as Machine Checks (it performs all sorts of recovery tasks and passes back control to us with informations about the error), Hardware Management Interrupts and Softpatch interrupts. This wires up the mechanisms and prints out specific informations returned by HAL when a machine check occurs. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 2 ++ arch/powerpc/include/asm/paca.h | 8 ++++++++ 2 files changed, 10 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 77ebe50020a..2893e8f5406 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -436,6 +436,8 @@ extern void opal_get_rtc_time(struct rtc_time *tm); extern unsigned long opal_get_boot_time(void); extern void opal_nvram_init(void); +extern int opal_machine_check(struct pt_regs *regs); + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 516bfb3f47d..17722c73ba2 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -43,6 +43,7 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */ #define get_slb_shadow() (get_paca()->slb_shadow_ptr) struct task_struct; +struct opal_machine_check_event; /* * Defines the layout of the paca. @@ -135,6 +136,13 @@ struct paca_struct { u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ +#ifdef CONFIG_PPC_POWERNV + /* Pointer to OPAL machine check event structure set by the + * early exception handler for use by high level C handler + */ + struct opal_machine_check_event *opal_mc_evt; +#endif + /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ u64 system_time; /* accumulated system TB ticks */ -- cgit v1.2.3 From 37caf9f2a1b99d11ba71e17168d221da9ca13f24 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Sat, 27 Aug 2011 06:14:23 -0500 Subject: powerpc/fsl-booke: Handle L1 D-cache parity error correctly on e500mc If the L1 D-Cache is in write shadow mode the HW will auto-recover the error. However we might still log the error and cause a machine check (if L1CSR0[CPE] - Cache error checking enable). We should only treat the non-write shadow case as non-recoverable. Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/reg_booke.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/powerpc/include/asm') diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 9ec0b39f9dd..28cdbd9f399 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -548,6 +548,9 @@ #define L1CSR1_ICFI 0x00000002 /* Instr Cache Flash Invalidate */ #define L1CSR1_ICE 0x00000001 /* Instr Cache Enable */ +/* Bit definitions for L1CSR2. */ +#define L1CSR2_DCWS 0x40000000 /* Data Cache write shadow */ + /* Bit definitions for L2CSR0. */ #define L2CSR0_L2E 0x80000000 /* L2 Cache Enable */ #define L2CSR0_L2PE 0x40000000 /* L2 Cache Parity/ECC Enable */ -- cgit v1.2.3