From 674bfa485554156aa90ce17288712fcb568a42c3 Mon Sep 17 00:00:00 2001 From: Suzuki Poulose Date: Mon, 18 Jul 2011 03:29:20 +0000 Subject: powerpc/44x: Kexec support for PPC440X chipsets This patch adds kexec support for PPC440 based chipsets. This work is based on the KEXEC patches for FSL BookE. The FSL BookE patch and the code flow could be found at the link below: http://patchwork.ozlabs.org/patch/49359/ Steps: 1) Invalidate all the TLB entries except the one this code is run from 2) Create a tmp mapping for our code in the other address space and jump to it 3) Invalidate the entry we used 4) Create a 1:1 mapping for 0-2GiB in blocks of 256M 5) Jump to the new 1:1 mapping and invalidate the tmp mapping I have tested this patches on Ebony, Sequoia boards and Virtex on QEMU. You need kexec-tools commit e8b7939b1e or newer for ppc440x support, available at: git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git Signed-off-by: Suzuki Poulose Cc: Sebastian Andrzej Siewior Signed-off-by: Josh Boyer --- arch/powerpc/kernel/misc_32.S | 171 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 998a1002860..f7d760ab5ca 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -8,6 +8,8 @@ * kexec bits: * Copyright (C) 2002-2003 Eric Biederman * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * PPC44x port. Copyright (C) 2011, IBM Corporation + * Author: Suzuki Poulose * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -735,6 +737,175 @@ relocate_new_kernel: mr r4, r30 mr r5, r31 + li r0, 0 +#elif defined(CONFIG_44x) && !defined(CONFIG_47x) + +/* + * Code for setting up 1:1 mapping for PPC440x for KEXEC + * + * We cannot switch off the MMU on PPC44x. + * So we: + * 1) Invalidate all the mappings except the one we are running from. + * 2) Create a tmp mapping for our code in the other address space(TS) and + * jump to it. Invalidate the entry we started in. + * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS. + * 4) Jump to the 1:1 mapping in original TS. + * 5) Invalidate the tmp mapping. + * + * - Based on the kexec support code for FSL BookE + * - Doesn't support 47x yet. + * + */ + /* Save our parameters */ + mr r29, r3 + mr r30, r4 + mr r31, r5 + + /* Load our MSR_IS and TID to MMUCR for TLB search */ + mfspr r3,SPRN_PID + mfmsr r4 + andi. r4,r4,MSR_IS@l + beq wmmucr + oris r3,r3,PPC44x_MMUCR_STS@h +wmmucr: + mtspr SPRN_MMUCR,r3 + sync + + /* + * Invalidate all the TLB entries except the current entry + * where we are running from + */ + bl 0f /* Find our address */ +0: mflr r5 /* Make it accessible */ + tlbsx r23,0,r5 /* Find entry we are in */ + li r4,0 /* Start at TLB entry 0 */ + li r3,0 /* Set PAGEID inval value */ +1: cmpw r23,r4 /* Is this our entry? */ + beq skip /* If so, skip the inval */ + tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ +skip: + addi r4,r4,1 /* Increment */ + cmpwi r4,64 /* Are we done? */ + bne 1b /* If not, repeat */ + isync + + /* Create a temp mapping and jump to it */ + andi. r6, r23, 1 /* Find the index to use */ + addi r24, r6, 1 /* r24 will contain 1 or 2 */ + + mfmsr r9 /* get the MSR */ + rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */ + xori r7, r5, 1 /* Use the other address space */ + + /* Read the current mapping entries */ + tlbre r3, r23, PPC44x_TLB_PAGEID + tlbre r4, r23, PPC44x_TLB_XLAT + tlbre r5, r23, PPC44x_TLB_ATTRIB + + /* Save our current XLAT entry */ + mr r25, r4 + + /* Extract the TLB PageSize */ + li r10, 1 /* r10 will hold PageSize */ + rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */ + + /* XXX: As of now we use 256M, 4K pages */ + cmpwi r11, PPC44x_TLB_256M + bne tlb_4k + rotlwi r10, r10, 28 /* r10 = 256M */ + b write_out +tlb_4k: + cmpwi r11, PPC44x_TLB_4K + bne default + rotlwi r10, r10, 12 /* r10 = 4K */ + b write_out +default: + rotlwi r10, r10, 10 /* r10 = 1K */ + +write_out: + /* + * Write out the tmp 1:1 mapping for this code in other address space + * Fixup EPN = RPN , TS=other address space + */ + insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */ + + /* Write out the tmp mapping entries */ + tlbwe r3, r24, PPC44x_TLB_PAGEID + tlbwe r4, r24, PPC44x_TLB_XLAT + tlbwe r5, r24, PPC44x_TLB_ATTRIB + + subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */ + not r10, r11 /* Mask for PageNum */ + + /* Switch to other address space in MSR */ + insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ + + bl 1f +1: mflr r8 + addi r8, r8, (2f-1b) /* Find the target offset */ + + /* Jump to the tmp mapping */ + mtspr SPRN_SRR0, r8 + mtspr SPRN_SRR1, r9 + rfi + +2: + /* Invalidate the entry we were executing from */ + li r3, 0 + tlbwe r3, r23, PPC44x_TLB_PAGEID + + /* attribute fields. rwx for SUPERVISOR mode */ + li r5, 0 + ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) + + /* Create 1:1 mapping in 256M pages */ + xori r7, r7, 1 /* Revert back to Original TS */ + + li r8, 0 /* PageNumber */ + li r6, 3 /* TLB Index, start at 3 */ + +next_tlb: + rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */ + mr r4, r3 /* RPN = EPN */ + ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */ + insrwi r3, r7, 1, 23 /* Set TS from r7 */ + + tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */ + tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */ + tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */ + + addi r8, r8, 1 /* Increment PN */ + addi r6, r6, 1 /* Increment TLB Index */ + cmpwi r8, 8 /* Are we done ? */ + bne next_tlb + isync + + /* Jump to the new mapping 1:1 */ + li r9,0 + insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ + + bl 1f +1: mflr r8 + and r8, r8, r11 /* Get our offset within page */ + addi r8, r8, (2f-1b) + + and r5, r25, r10 /* Get our target PageNum */ + or r8, r8, r5 /* Target jump address */ + + mtspr SPRN_SRR0, r8 + mtspr SPRN_SRR1, r9 + rfi +2: + /* Invalidate the tmp entry we used */ + li r3, 0 + tlbwe r3, r24, PPC44x_TLB_PAGEID + sync + + /* Restore the parameters */ + mr r3, r29 + mr r4, r30 + mr r5, r31 + li r0, 0 #else li r0, 0 -- cgit v1.2.3 From 6a5c7be5e484bda5b2639fedf7dbe3f25c15c962 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Fri, 24 Jun 2011 09:05:22 +0000 Subject: powerpc: Override dma_get_required_mask by platform hook and ops The hook dma_get_required_mask is supposed to return the mask required by the platform to operate efficently. The generic version of dma_get_required_mask in driver/base/platform.c returns a mask based only on max_pfn. However, this is likely too big for iommu systems and could be too small for platforms that require a dma offset or have a secondary window at a high offset. Override the default, provide a hook in ppc_md used by pseries lpar and cell, and provide the default answer based on memblock_end_of_DRAM(), with hooks for get_dma_offset, and provide an implementation for iommu that looks at the defined table size. Coverting from the end address to the required bit mask is based on the generic implementation. The need for this was discovered when the qla2xxx driver switched to 64 bit dma then reverted to 32 bit when dma_get_required_mask said 32 bits was sufficient. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Cc: benh@kernel.crashing.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma-iommu.c | 13 +++++++++++++ arch/powerpc/kernel/dma.c | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index e7554154a6d..1f2a711a261 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -90,6 +90,19 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) return 1; } +u64 dma_iommu_get_required_mask(struct device *dev) +{ + struct iommu_table *tbl = get_iommu_table_base(dev); + u64 mask; + if (!tbl) + return 0; + + mask = 1ULL < (fls_long(tbl->it_offset + tbl->it_size) - 1); + mask += mask - 1; + + return mask; +} + struct dma_map_ops dma_iommu_ops = { .alloc_coherent = dma_iommu_alloc_coherent, .free_coherent = dma_iommu_free_coherent, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 4f0959fbfbe..503093efa20 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -170,6 +170,45 @@ int dma_set_mask(struct device *dev, u64 dma_mask) } EXPORT_SYMBOL(dma_set_mask); +u64 dma_get_required_mask(struct device *dev) +{ + struct dma_map_ops *dma_ops = get_dma_ops(dev); + u64 mask, end = 0; + + if (ppc_md.dma_get_required_mask) + return ppc_md.dma_get_required_mask(dev); + + if (unlikely(dma_ops == NULL)) + return 0; + +#ifdef CONFIG_PPC64 + else if (dma_ops == &dma_iommu_ops) + return dma_iommu_get_required_mask(dev); +#endif +#ifdef CONFIG_SWIOTLB + else if (dma_ops == &swiotlb_dma_ops) { + u64 max_direct_dma_addr = dev->archdata.max_direct_dma_addr; + + end = memblock_end_of_DRAM(); + if (max_direct_dma_addr && end > max_direct_dma_addr) + end = max_direct_dma_addr; + end += get_dma_offset(dev); + } +#endif + else if (dma_ops == &dma_direct_ops) + end = memblock_end_of_DRAM() + get_dma_offset(dev); + else { + WARN_ONCE(1, "%s: unknown ops %p\n", __func__, dma_ops); + end = memblock_end_of_DRAM(); + } + + mask = 1ULL << (fls64(end) - 1); + mask += mask - 1; + + return mask; +} +EXPORT_SYMBOL_GPL(dma_get_required_mask); + static int __init dma_init(void) { dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); -- cgit v1.2.3 From d24f9c6999eacd3a7bc2b289e49fcb2bf2fafef2 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Fri, 24 Jun 2011 09:05:24 +0000 Subject: powerpc: Use the newly added get_required_mask dma_map_ops hook Now that the generic code has dma_map_ops set, instead of having a messy ifdef & if block in the base dma_get_required_mask hook push the computation into the dma ops. If the ops fails to set the get_required_mask hook default to the width of dma_addr_t. This also corrects ibmbus ibmebus_dma_supported to require a 64 bit mask. I doubt anything is checking or setting the dma mask on that bus. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Cc: benh@kernel.crashing.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma-iommu.c | 3 ++- arch/powerpc/kernel/dma-swiotlb.c | 16 +++++++++++++++ arch/powerpc/kernel/dma.c | 41 +++++++++++++++------------------------ arch/powerpc/kernel/ibmebus.c | 8 +++++++- arch/powerpc/kernel/vio.c | 7 ++++++- 5 files changed, 47 insertions(+), 28 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 1f2a711a261..c1ad9db934f 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -90,7 +90,7 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) return 1; } -u64 dma_iommu_get_required_mask(struct device *dev) +static u64 dma_iommu_get_required_mask(struct device *dev) { struct iommu_table *tbl = get_iommu_table_base(dev); u64 mask; @@ -111,5 +111,6 @@ struct dma_map_ops dma_iommu_ops = { .dma_supported = dma_iommu_dma_supported, .map_page = dma_iommu_map_page, .unmap_page = dma_iommu_unmap_page, + .get_required_mask = dma_iommu_get_required_mask, }; EXPORT_SYMBOL(dma_iommu_ops); diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 4295e0b94b2..1ebc9189aad 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -24,6 +24,21 @@ unsigned int ppc_swiotlb_enable; +static u64 swiotlb_powerpc_get_required(struct device *dev) +{ + u64 end, mask, max_direct_dma_addr = dev->archdata.max_direct_dma_addr; + + end = memblock_end_of_DRAM(); + if (max_direct_dma_addr && end > max_direct_dma_addr) + end = max_direct_dma_addr; + end += get_dma_offset(dev); + + mask = 1ULL << (fls64(end) - 1); + mask += mask - 1; + + return mask; +} + /* * At the moment, all platforms that use this code only require * swiotlb to be used if we're operating on HIGHMEM. Since @@ -44,6 +59,7 @@ struct dma_map_ops swiotlb_dma_ops = { .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, .mapping_error = swiotlb_dma_mapping_error, + .get_required_mask = swiotlb_powerpc_get_required, }; void pci_dma_dev_setup_swiotlb(struct pci_dev *pdev) diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 503093efa20..10b136afbf5 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -96,6 +96,18 @@ static int dma_direct_dma_supported(struct device *dev, u64 mask) #endif } +static u64 dma_direct_get_required_mask(struct device *dev) +{ + u64 end, mask; + + end = memblock_end_of_DRAM() + get_dma_offset(dev); + + mask = 1ULL << (fls64(end) - 1); + mask += mask - 1; + + return mask; +} + static inline dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, unsigned long offset, @@ -144,6 +156,7 @@ struct dma_map_ops dma_direct_ops = { .dma_supported = dma_direct_dma_supported, .map_page = dma_direct_map_page, .unmap_page = dma_direct_unmap_page, + .get_required_mask = dma_direct_get_required_mask, #ifdef CONFIG_NOT_COHERENT_CACHE .sync_single_for_cpu = dma_direct_sync_single, .sync_single_for_device = dma_direct_sync_single, @@ -173,7 +186,6 @@ EXPORT_SYMBOL(dma_set_mask); u64 dma_get_required_mask(struct device *dev) { struct dma_map_ops *dma_ops = get_dma_ops(dev); - u64 mask, end = 0; if (ppc_md.dma_get_required_mask) return ppc_md.dma_get_required_mask(dev); @@ -181,31 +193,10 @@ u64 dma_get_required_mask(struct device *dev) if (unlikely(dma_ops == NULL)) return 0; -#ifdef CONFIG_PPC64 - else if (dma_ops == &dma_iommu_ops) - return dma_iommu_get_required_mask(dev); -#endif -#ifdef CONFIG_SWIOTLB - else if (dma_ops == &swiotlb_dma_ops) { - u64 max_direct_dma_addr = dev->archdata.max_direct_dma_addr; - - end = memblock_end_of_DRAM(); - if (max_direct_dma_addr && end > max_direct_dma_addr) - end = max_direct_dma_addr; - end += get_dma_offset(dev); - } -#endif - else if (dma_ops == &dma_direct_ops) - end = memblock_end_of_DRAM() + get_dma_offset(dev); - else { - WARN_ONCE(1, "%s: unknown ops %p\n", __func__, dma_ops); - end = memblock_end_of_DRAM(); - } + if (dma_ops->get_required_mask) + return dma_ops->get_required_mask(dev); - mask = 1ULL << (fls64(end) - 1); - mask += mask - 1; - - return mask; + return DMA_BIT_MASK(8 * sizeof(dma_addr_t)); } EXPORT_SYMBOL_GPL(dma_get_required_mask); diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 28581f1ad2c..90ef2a44613 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -125,7 +125,12 @@ static void ibmebus_unmap_sg(struct device *dev, static int ibmebus_dma_supported(struct device *dev, u64 mask) { - return 1; + return mask == DMA_BIT_MASK(64); +} + +static u64 ibmebus_dma_get_required_mask(struct device *dev) +{ + return DMA_BIT_MASK(64); } static struct dma_map_ops ibmebus_dma_ops = { @@ -134,6 +139,7 @@ static struct dma_map_ops ibmebus_dma_ops = { .map_sg = ibmebus_map_sg, .unmap_sg = ibmebus_unmap_sg, .dma_supported = ibmebus_dma_supported, + .get_required_mask = ibmebus_dma_get_required_mask, .map_page = ibmebus_map_page, .unmap_page = ibmebus_unmap_page, }; diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index 1b695fdc362..c0493259d13 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -605,6 +605,11 @@ static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask) return dma_iommu_ops.dma_supported(dev, mask); } +static u64 vio_dma_get_required_mask(struct device *dev) +{ + return dma_iommu_ops.get_required_mask(dev); +} + struct dma_map_ops vio_dma_mapping_ops = { .alloc_coherent = vio_dma_iommu_alloc_coherent, .free_coherent = vio_dma_iommu_free_coherent, @@ -613,7 +618,7 @@ struct dma_map_ops vio_dma_mapping_ops = { .map_page = vio_dma_iommu_map_page, .unmap_page = vio_dma_iommu_unmap_page, .dma_supported = vio_dma_iommu_dma_supported, - + .get_required_mask = vio_dma_get_required_mask, }; /** -- cgit v1.2.3 From 2eccacd0974dca73e2151d3fd4c2dacf1a5c7cc2 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Fri, 24 Jun 2011 09:05:25 +0000 Subject: powerpc: Tidy up dma_map_ops after adding new hook The new get_required_mask hook name is longer than many of but not all of the prior ops. Tidy the struct initializers to align the equal signs using the local whitespace. Signed-off-by: Milton Miller Signed-off-by: Nishanth Aravamudan Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-kernel@vger.kernel.org Cc: benh@kernel.crashing.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/dma-iommu.c | 14 +++++++------- arch/powerpc/kernel/dma.c | 16 ++++++++-------- arch/powerpc/kernel/ibmebus.c | 14 +++++++------- arch/powerpc/kernel/vio.c | 14 +++++++------- 4 files changed, 29 insertions(+), 29 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index c1ad9db934f..6f04b9c383a 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -104,13 +104,13 @@ static u64 dma_iommu_get_required_mask(struct device *dev) } struct dma_map_ops dma_iommu_ops = { - .alloc_coherent = dma_iommu_alloc_coherent, - .free_coherent = dma_iommu_free_coherent, - .map_sg = dma_iommu_map_sg, - .unmap_sg = dma_iommu_unmap_sg, - .dma_supported = dma_iommu_dma_supported, - .map_page = dma_iommu_map_page, - .unmap_page = dma_iommu_unmap_page, + .alloc_coherent = dma_iommu_alloc_coherent, + .free_coherent = dma_iommu_free_coherent, + .map_sg = dma_iommu_map_sg, + .unmap_sg = dma_iommu_unmap_sg, + .dma_supported = dma_iommu_dma_supported, + .map_page = dma_iommu_map_page, + .unmap_page = dma_iommu_unmap_page, .get_required_mask = dma_iommu_get_required_mask, }; EXPORT_SYMBOL(dma_iommu_ops); diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 10b136afbf5..8593f53c4f6 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -149,14 +149,14 @@ static inline void dma_direct_sync_single(struct device *dev, #endif struct dma_map_ops dma_direct_ops = { - .alloc_coherent = dma_direct_alloc_coherent, - .free_coherent = dma_direct_free_coherent, - .map_sg = dma_direct_map_sg, - .unmap_sg = dma_direct_unmap_sg, - .dma_supported = dma_direct_dma_supported, - .map_page = dma_direct_map_page, - .unmap_page = dma_direct_unmap_page, - .get_required_mask = dma_direct_get_required_mask, + .alloc_coherent = dma_direct_alloc_coherent, + .free_coherent = dma_direct_free_coherent, + .map_sg = dma_direct_map_sg, + .unmap_sg = dma_direct_unmap_sg, + .dma_supported = dma_direct_dma_supported, + .map_page = dma_direct_map_page, + .unmap_page = dma_direct_unmap_page, + .get_required_mask = dma_direct_get_required_mask, #ifdef CONFIG_NOT_COHERENT_CACHE .sync_single_for_cpu = dma_direct_sync_single, .sync_single_for_device = dma_direct_sync_single, diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c index 90ef2a44613..73110fb6bb6 100644 --- a/arch/powerpc/kernel/ibmebus.c +++ b/arch/powerpc/kernel/ibmebus.c @@ -134,14 +134,14 @@ static u64 ibmebus_dma_get_required_mask(struct device *dev) } static struct dma_map_ops ibmebus_dma_ops = { - .alloc_coherent = ibmebus_alloc_coherent, - .free_coherent = ibmebus_free_coherent, - .map_sg = ibmebus_map_sg, - .unmap_sg = ibmebus_unmap_sg, - .dma_supported = ibmebus_dma_supported, + .alloc_coherent = ibmebus_alloc_coherent, + .free_coherent = ibmebus_free_coherent, + .map_sg = ibmebus_map_sg, + .unmap_sg = ibmebus_unmap_sg, + .dma_supported = ibmebus_dma_supported, .get_required_mask = ibmebus_dma_get_required_mask, - .map_page = ibmebus_map_page, - .unmap_page = ibmebus_unmap_page, + .map_page = ibmebus_map_page, + .unmap_page = ibmebus_unmap_page, }; static int ibmebus_match_path(struct device *dev, void *data) diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c index c0493259d13..34d291d83ec 100644 --- a/arch/powerpc/kernel/vio.c +++ b/arch/powerpc/kernel/vio.c @@ -611,13 +611,13 @@ static u64 vio_dma_get_required_mask(struct device *dev) } struct dma_map_ops vio_dma_mapping_ops = { - .alloc_coherent = vio_dma_iommu_alloc_coherent, - .free_coherent = vio_dma_iommu_free_coherent, - .map_sg = vio_dma_iommu_map_sg, - .unmap_sg = vio_dma_iommu_unmap_sg, - .map_page = vio_dma_iommu_map_page, - .unmap_page = vio_dma_iommu_unmap_page, - .dma_supported = vio_dma_iommu_dma_supported, + .alloc_coherent = vio_dma_iommu_alloc_coherent, + .free_coherent = vio_dma_iommu_free_coherent, + .map_sg = vio_dma_iommu_map_sg, + .unmap_sg = vio_dma_iommu_unmap_sg, + .map_page = vio_dma_iommu_map_page, + .unmap_page = vio_dma_iommu_unmap_page, + .dma_supported = vio_dma_iommu_dma_supported, .get_required_mask = vio_dma_get_required_mask, }; -- cgit v1.2.3 From 7df5659eefad9b6d457ccdee016bd78bd064cfc0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Jun 2011 11:45:16 +0000 Subject: serial/8250: Move UPIO_TSI to powerpc This iotype is only used by the legacy_serial code in powerpc, so the code should live there, rather than be compiled in for every 8250 driver. Signed-off-by: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: linuxppc-dev@lists.ozlabs.org Cc: Greg Kroah-Hartman Cc: linux-serial@vger.kernel.org Acked-by: David Daney Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/legacy_serial.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 2b97b80d6d7..c7b5afeecaf 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,24 @@ static struct __initdata of_device_id legacy_serial_parents[] = { static unsigned int legacy_serial_count; static int legacy_serial_console = -1; +static unsigned int tsi_serial_in(struct uart_port *p, int offset) +{ + unsigned int tmp; + offset = offset << p->regshift; + if (offset == UART_IIR) { + tmp = readl(p->membase + (UART_IIR & ~3)); + return (tmp >> 16) & 0xff; /* UART_IIR % 4 == 2 */ + } else + return readb(p->membase + offset); +} + +static void tsi_serial_out(struct uart_port *p, int offset, int value) +{ + offset = offset << p->regshift; + if (!((offset == UART_IER) && (value & UART_IER_UUE))) + writeb(value, p->membase + offset); +} + static int __init add_legacy_port(struct device_node *np, int want_index, int iotype, phys_addr_t base, phys_addr_t taddr, unsigned long irq, @@ -102,6 +121,7 @@ static int __init add_legacy_port(struct device_node *np, int want_index, legacy_serial_ports[index].iobase = base; else legacy_serial_ports[index].mapbase = base; + legacy_serial_ports[index].iotype = iotype; legacy_serial_ports[index].uartclk = clock; legacy_serial_ports[index].irq = irq; @@ -112,6 +132,11 @@ static int __init add_legacy_port(struct device_node *np, int want_index, legacy_serial_infos[index].speed = spd ? be32_to_cpup(spd) : 0; legacy_serial_infos[index].irq_check_parent = irq_check_parent; + if (iotype == UPIO_TSI) { + legacy_serial_ports[index].serial_in = tsi_serial_in; + legacy_serial_ports[index].serial_out = tsi_serial_out; + } + printk(KERN_DEBUG "Found legacy serial port %d for %s\n", index, np->full_name); printk(KERN_DEBUG " %s=%llx, taddr=%llx, irq=%lx, clk=%d, speed=%d\n", -- cgit v1.2.3 From 41151e77a4d96ea138cede6d84c955aa4769ce74 Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Tue, 28 Jun 2011 09:54:48 +0000 Subject: powerpc: Hugetlb for BookE Enable hugepages on Freescale BookE processors. This allows the kernel to use huge TLB entries to map pages, which can greatly reduce the number of TLB misses and the amount of TLB thrashing experienced by applications with large memory footprints. Care should be taken when using this on FSL processors, as the number of large TLB entries supported by the core is low (16-64) on current processors. The supported set of hugepage sizes include 4m, 16m, 64m, 256m, and 1g. Page sizes larger than the max zone size are called "gigantic" pages and must be allocated on the command line (and cannot be deallocated). This is currently only fully implemented for Freescale 32-bit BookE processors, but there is some infrastructure in the code for 64-bit BooKE. Signed-off-by: Becky Bruce Signed-off-by: David Gibson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_fsl_booke.S | 133 ++++++++++++++++++++++++++++++----- 1 file changed, 114 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 50845924b7d..4ea9bfbf67e 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -236,8 +236,24 @@ _ENTRY(__early_start) * if we find the pte (fall through): * r11 is low pte word * r12 is pointer to the pte + * r10 is the pshift from the PGD, if we're a hugepage */ #ifdef CONFIG_PTE_64BIT +#ifdef CONFIG_HUGETLB_PAGE +#define FIND_PTE \ + rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ + lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ + blt 1000f; /* Normal non-huge page */ \ + beq 2f; /* Bail if no table */ \ + oris r11, r11, PD_HUGE@h; /* Put back address bit */ \ + andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \ + xor r12, r10, r11; /* drop size bits from pointer */ \ + b 1001f; \ +1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + li r10, 0; /* clear r10 */ \ +1001: lwz r11, 4(r12); /* Get pte entry */ +#else #define FIND_PTE \ rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ @@ -245,7 +261,8 @@ _ENTRY(__early_start) beq 2f; /* Bail if no table */ \ rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ lwz r11, 4(r12); /* Get pte entry */ -#else +#endif /* HUGEPAGE */ +#else /* !PTE_64BIT */ #define FIND_PTE \ rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ lwz r11, 0(r11); /* Get L1 entry */ \ @@ -402,8 +419,8 @@ interrupt_base: #ifdef CONFIG_PTE_64BIT #ifdef CONFIG_SMP - subf r10,r11,r12 /* create false data dep */ - lwzx r13,r11,r10 /* Get upper pte bits */ + subf r13,r11,r12 /* create false data dep */ + lwzx r13,r11,r13 /* Get upper pte bits */ #else lwz r13,0(r12) /* Get upper pte bits */ #endif @@ -483,8 +500,8 @@ interrupt_base: #ifdef CONFIG_PTE_64BIT #ifdef CONFIG_SMP - subf r10,r11,r12 /* create false data dep */ - lwzx r13,r11,r10 /* Get upper pte bits */ + subf r13,r11,r12 /* create false data dep */ + lwzx r13,r11,r13 /* Get upper pte bits */ #else lwz r13,0(r12) /* Get upper pte bits */ #endif @@ -548,7 +565,7 @@ interrupt_base: /* * Both the instruction and data TLB miss get to this * point to load the TLB. - * r10 - available to use + * r10 - tsize encoding (if HUGETLB_PAGE) or available to use * r11 - TLB (info from Linux PTE) * r12 - available to use * r13 - upper bits of PTE (if PTE_64BIT) or available to use @@ -558,21 +575,73 @@ interrupt_base: * Upon exit, we reload everything and RFI. */ finish_tlb_load: +#ifdef CONFIG_HUGETLB_PAGE + cmpwi 6, r10, 0 /* check for huge page */ + beq 6, finish_tlb_load_cont /* !huge */ + + /* Alas, we need more scratch registers for hugepages */ + mfspr r12, SPRN_SPRG_THREAD + stw r14, THREAD_NORMSAVE(4)(r12) + stw r15, THREAD_NORMSAVE(5)(r12) + stw r16, THREAD_NORMSAVE(6)(r12) + stw r17, THREAD_NORMSAVE(7)(r12) + + /* Get the next_tlbcam_idx percpu var */ +#ifdef CONFIG_SMP + lwz r12, THREAD_INFO-THREAD(r12) + lwz r15, TI_CPU(r12) + lis r14, __per_cpu_offset@h + ori r14, r14, __per_cpu_offset@l + rlwinm r15, r15, 2, 0, 29 + lwzx r16, r14, r15 +#else + li r16, 0 +#endif + lis r17, next_tlbcam_idx@h + ori r17, r17, next_tlbcam_idx@l + add r17, r17, r16 /* r17 = *next_tlbcam_idx */ + lwz r15, 0(r17) /* r15 = next_tlbcam_idx */ + + lis r14, MAS0_TLBSEL(1)@h /* select TLB1 (TLBCAM) */ + rlwimi r14, r15, 16, 4, 15 /* next_tlbcam_idx entry */ + mtspr SPRN_MAS0, r14 + + /* Extract TLB1CFG(NENTRY) */ + mfspr r16, SPRN_TLB1CFG + andi. r16, r16, 0xfff + + /* Update next_tlbcam_idx, wrapping when necessary */ + addi r15, r15, 1 + cmpw r15, r16 + blt 100f + lis r14, tlbcam_index@h + ori r14, r14, tlbcam_index@l + lwz r15, 0(r14) +100: stw r15, 0(r17) + + /* + * Calc MAS1_TSIZE from r10 (which has pshift encoded) + * tlb_enc = (pshift - 10). + */ + subi r15, r10, 10 + mfspr r16, SPRN_MAS1 + rlwimi r16, r15, 7, 20, 24 + mtspr SPRN_MAS1, r16 + + /* copy the pshift for use later */ + mr r14, r10 + + /* fall through */ + +#endif /* CONFIG_HUGETLB_PAGE */ + /* * We set execute, because we don't have the granularity to * properly set this at the page level (Linux problem). * Many of these bits are software only. Bits we don't set * here we (properly should) assume have the appropriate value. */ - - mfspr r12, SPRN_MAS2 -#ifdef CONFIG_PTE_64BIT - rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */ -#else - rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ -#endif - mtspr SPRN_MAS2, r12 - +finish_tlb_load_cont: #ifdef CONFIG_PTE_64BIT rlwinm r12, r11, 32-2, 26, 31 /* Move in perm bits */ andi. r10, r11, _PAGE_DIRTY @@ -581,22 +650,40 @@ finish_tlb_load: andc r12, r12, r10 1: rlwimi r12, r13, 20, 0, 11 /* grab RPN[32:43] */ rlwimi r12, r11, 20, 12, 19 /* grab RPN[44:51] */ - mtspr SPRN_MAS3, r12 +2: mtspr SPRN_MAS3, r12 BEGIN_MMU_FTR_SECTION srwi r10, r13, 12 /* grab RPN[12:31] */ mtspr SPRN_MAS7, r10 END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) #else li r10, (_PAGE_EXEC | _PAGE_PRESENT) + mr r13, r11 rlwimi r10, r11, 31, 29, 29 /* extract _PAGE_DIRTY into SW */ and r12, r11, r10 andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ slwi r10, r12, 1 or r10, r10, r12 iseleq r12, r12, r10 - rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */ - mtspr SPRN_MAS3, r11 + rlwimi r13, r12, 0, 20, 31 /* Get RPN from PTE, merge w/ perms */ + mtspr SPRN_MAS3, r13 #endif + + mfspr r12, SPRN_MAS2 +#ifdef CONFIG_PTE_64BIT + rlwimi r12, r11, 32-19, 27, 31 /* extract WIMGE from pte */ +#else + rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ +#endif +#ifdef CONFIG_HUGETLB_PAGE + beq 6, 3f /* don't mask if page isn't huge */ + li r13, 1 + slw r13, r13, r14 + subi r13, r13, 1 + rlwinm r13, r13, 0, 0, 19 /* bottom bits used for WIMGE/etc */ + andc r12, r12, r13 /* mask off ea bits within the page */ +#endif +3: mtspr SPRN_MAS2, r12 + #ifdef CONFIG_E200 /* Round robin TLB1 entries assignment */ mfspr r12, SPRN_MAS0 @@ -622,11 +709,19 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) mtspr SPRN_MAS0,r12 #endif /* CONFIG_E200 */ +tlb_write_entry: tlbwe /* Done...restore registers and get out of here. */ mfspr r10, SPRN_SPRG_THREAD - lwz r11, THREAD_NORMSAVE(3)(r10) +#ifdef CONFIG_HUGETLB_PAGE + beq 6, 8f /* skip restore for 4k page faults */ + lwz r14, THREAD_NORMSAVE(4)(r10) + lwz r15, THREAD_NORMSAVE(5)(r10) + lwz r16, THREAD_NORMSAVE(6)(r10) + lwz r17, THREAD_NORMSAVE(7)(r10) +#endif +8: lwz r11, THREAD_NORMSAVE(3)(r10) mtcr r11 lwz r13, THREAD_NORMSAVE(2)(r10) lwz r12, THREAD_NORMSAVE(1)(r10) -- cgit v1.2.3 From 6dece0eb69b2a28e18d104bc5d707f1cb673f5e0 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 25 Jul 2011 11:29:33 +0000 Subject: powerpc/32: Pass device tree address as u64 to machine_init u64 is used rather than phys_addr_t to keep things simple, as this is called from assembly code. Update callers to pass a 64-bit address in r3/r4. Other unused register assignments that were once parameters to machine_init are dropped. For FSL BookE, look up the physical address of the device tree from the effective address passed in r3 by the loader. This is required for situations where memory does not start at zero (due to AMP or IOMMU-less virtualization), and thus the IMA doesn't start at zero, and thus the device tree effective address does not equal the physical address. Signed-off-by: Scott Wood Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_32.S | 7 +++--- arch/powerpc/kernel/head_40x.S | 15 +++---------- arch/powerpc/kernel/head_44x.S | 16 +++----------- arch/powerpc/kernel/head_8xx.S | 13 +++-------- arch/powerpc/kernel/head_fsl_booke.S | 42 ++++++++++++++++++++++-------------- arch/powerpc/kernel/setup_32.c | 2 +- 6 files changed, 39 insertions(+), 56 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index ba250d505e0..0654dba2c1f 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -139,8 +139,7 @@ __start: trap #endif /* CONFIG_PPC_PMAC */ -1: mr r31,r3 /* save parameters */ - mr r30,r4 +1: mr r31,r3 /* save device tree ptr */ li r24,0 /* cpu # */ /* @@ -964,8 +963,8 @@ start_here: * Do early platform-specific initialization, * and set up the MMU. */ - mr r3,r31 - mr r4,r30 + li r3,0 + mr r4,r31 bl machine_init bl __save_cpu_setup bl MMU_init diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a91626d87fc..872a6af83ba 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -58,13 +58,7 @@ _ENTRY(_stext); _ENTRY(_start); - /* Save parameters we are passed. - */ - mr r31,r3 - mr r30,r4 - mr r29,r5 - mr r28,r6 - mr r27,r7 + mr r31,r3 /* save device tree ptr */ /* We have to turn on the MMU right away so we get cache modes * set correctly. @@ -849,11 +843,8 @@ start_here: /* * Decide what sort of machine this is and initialize the MMU. */ - mr r3,r31 - mr r4,r30 - mr r5,r29 - mr r6,r28 - mr r7,r27 + li r3,0 + mr r4,r31 bl machine_init bl MMU_init diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index f8e971ba94f..b725dab0f88 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -61,14 +61,7 @@ _ENTRY(_start); * of abatron_pteptrs */ nop -/* - * Save parameters we are passed - */ - mr r31,r3 - mr r30,r4 - mr r29,r5 - mr r28,r6 - mr r27,r7 + mr r31,r3 /* save device tree ptr */ li r24,0 /* CPU number */ bl init_cpu_state @@ -120,11 +113,8 @@ _ENTRY(_start); /* * Decide what sort of machine this is and initialize the MMU. */ - mr r3,r31 - mr r4,r30 - mr r5,r29 - mr r6,r28 - mr r7,r27 + li r3,0 + mr r4,r31 bl machine_init bl MMU_init diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1cbf64e6b41..b68cb173ba2 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -76,11 +76,7 @@ _ENTRY(_start); */ .globl __start __start: - mr r31,r3 /* save parameters */ - mr r30,r4 - mr r29,r5 - mr r28,r6 - mr r27,r7 + mr r31,r3 /* save device tree ptr */ /* We have to turn on the MMU right away so we get cache modes * set correctly. @@ -723,11 +719,8 @@ start_here: /* * Decide what sort of machine this is and initialize the MMU. */ - mr r3,r31 - mr r4,r30 - mr r5,r29 - mr r6,r28 - mr r7,r27 + li r3,0 + mr r4,r31 bl machine_init bl MMU_init diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 4ea9bfbf67e..e1c699f3b7a 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -63,17 +63,30 @@ _ENTRY(_start); * of abatron_pteptrs */ nop -/* - * Save parameters we are passed - */ - mr r31,r3 - mr r30,r4 - mr r29,r5 - mr r28,r6 - mr r27,r7 - li r25,0 /* phys kernel start (low) */ - li r24,0 /* CPU number */ - li r23,0 /* phys kernel start (high) */ + + /* Translate device tree address to physical, save in r30/r31 */ + mfmsr r16 + mfspr r17,SPRN_PID + rlwinm r17,r17,16,0x3fff0000 /* turn PID into MAS6[SPID] */ + rlwimi r17,r16,28,0x00000001 /* turn MSR[DS] into MAS6[SAS] */ + mtspr SPRN_MAS6,r17 + + tlbsx 0,r3 /* must succeed */ + + mfspr r16,SPRN_MAS1 + mfspr r20,SPRN_MAS3 + rlwinm r17,r16,25,0x1f /* r17 = log2(page size) */ + li r18,1024 + slw r18,r18,r17 /* r18 = page size */ + addi r18,r18,-1 + and r19,r3,r18 /* r19 = page offset */ + andc r31,r20,r18 /* r3 = page base */ + or r31,r31,r19 /* r3 = devtree phys addr */ + mfspr r30,SPRN_MAS7 + + li r25,0 /* phys kernel start (low) */ + li r24,0 /* CPU number */ + li r23,0 /* phys kernel start (high) */ /* We try to not make any assumptions about how the boot loader * setup or used the TLBs. We invalidate all mappings from the @@ -198,11 +211,8 @@ _ENTRY(__early_start) /* * Decide what sort of machine this is and initialize the MMU. */ - mr r3,r31 - mr r4,r30 - mr r5,r29 - mr r6,r28 - mr r7,r27 + mr r3,r30 + mr r4,r31 bl machine_init bl MMU_init diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 209135af0a4..c1ce86357ec 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -117,7 +117,7 @@ notrace unsigned long __init early_init(unsigned long dt_ptr) * This is called very early on the boot process, after a minimal * MMU environment has been set up but before MMU_init is called. */ -notrace void __init machine_init(unsigned long dt_ptr) +notrace void __init machine_init(u64 dt_ptr) { lockdep_init(); -- cgit v1.2.3 From 0330581ab3b9002d55ee66f377ccbbb742175c01 Mon Sep 17 00:00:00 2001 From: Tang Yuantian Date: Tue, 16 Aug 2011 19:51:33 +0000 Subject: powerpc/mm: Fix the call trace when resumed from hibernation In SMP mode, the kernel would produce call trace when resumed from hibernation. The reason is when the function destroy_context is called to drop the resuming mm context, the mm->context.active is 1 which is wrong and should be zero. We pass the current->active_mm as previous mm context to function switch_mmu_context to decrease the context.active by 1. In UP mode, there is no effect. Signed-off-by: Tang Yuantian Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/swsusp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c index aa17b76dd42..641f9adc620 100644 --- a/arch/powerpc/kernel/swsusp.c +++ b/arch/powerpc/kernel/swsusp.c @@ -33,6 +33,6 @@ void save_processor_state(void) void restore_processor_state(void) { #ifdef CONFIG_PPC32 - switch_mmu_context(NULL, current->active_mm); + switch_mmu_context(current->active_mm, current->active_mm); #endif } -- cgit v1.2.3 From c26afe9e8591f306d79aab8071f1d34e4f60b700 Mon Sep 17 00:00:00 2001 From: Hector Martin Date: Wed, 31 Aug 2011 06:32:26 +0000 Subject: powerpc/ps3: Add gelic udbg driver Add a new udbg driver for the PS3 gelic Ehthernet device. This driver shares only a few stucture and constant definitions with the gelic Ethernet device driver, so is implemented as a stand-alone driver with no dependencies on the gelic Ethernet device driver. Signed-off-by: Hector Martin Signed-off-by: Andre Heider Signed-off-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/udbg.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index faa82c1f3f6..5b3e98e0315 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -67,6 +67,8 @@ void __init udbg_early_init(void) udbg_init_usbgecko(); #elif defined(CONFIG_PPC_EARLY_DEBUG_WSP) udbg_init_wsp(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_PS3GELIC) + udbg_init_ps3gelic(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG -- cgit v1.2.3 From 94db7c5e14f44b943febe54e089d077cd983d284 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Aug 2011 20:44:22 +0000 Subject: powerpc: Use for_each_node_by_type instead of open coding it Use for_each_node_by_type instead of open coding it. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/machine_kexec_64.c | 3 +-- arch/powerpc/kernel/setup_64.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c index 583af70c4b1..26ccbf77dd4 100644 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ b/arch/powerpc/kernel/machine_kexec_64.c @@ -74,8 +74,7 @@ int default_machine_kexec_prepare(struct kimage *image) } /* We also should not overwrite the tce tables */ - for (node = of_find_node_by_type(NULL, "pci"); node != NULL; - node = of_find_node_by_type(node, "pci")) { + for_each_node_by_type(node, "pci") { basep = of_get_property(node, "linux,tce-base", NULL); sizep = of_get_property(node, "linux,tce-size", NULL); if (basep == NULL || sizep == NULL) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index aebef1320ed..eade1fd8ee2 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -278,7 +278,7 @@ static void __init initialize_cache_info(void) DBG(" -> initialize_cache_info()\n"); - for (np = NULL; (np = of_find_node_by_type(np, "cpu"));) { + for_each_node_by_type(np, "cpu") { num_cpus += 1; /* We're assuming *all* of the CPUs have the same -- cgit v1.2.3 From dfbe93a222e74b6f96ad84eff2b04a0f864fac65 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 10 Aug 2011 20:44:23 +0000 Subject: powerpc: Coding style cleanups While converting code to use for_each_node_by_type I noticed a number of coding style issues. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/setup_64.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index eade1fd8ee2..d4168c93098 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -281,11 +281,11 @@ static void __init initialize_cache_info(void) for_each_node_by_type(np, "cpu") { num_cpus += 1; - /* We're assuming *all* of the CPUs have the same + /* + * We're assuming *all* of the CPUs have the same * d-cache and i-cache sizes... -Peter */ - - if ( num_cpus == 1 ) { + if (num_cpus == 1) { const u32 *sizep, *lsizep; u32 size, lsize; @@ -294,10 +294,13 @@ static void __init initialize_cache_info(void) sizep = of_get_property(np, "d-cache-size", NULL); if (sizep != NULL) size = *sizep; - lsizep = of_get_property(np, "d-cache-block-size", NULL); + lsizep = of_get_property(np, "d-cache-block-size", + NULL); /* fallback if block size missing */ if (lsizep == NULL) - lsizep = of_get_property(np, "d-cache-line-size", NULL); + lsizep = of_get_property(np, + "d-cache-line-size", + NULL); if (lsizep != NULL) lsize = *lsizep; if (sizep == 0 || lsizep == 0) @@ -314,9 +317,12 @@ static void __init initialize_cache_info(void) sizep = of_get_property(np, "i-cache-size", NULL); if (sizep != NULL) size = *sizep; - lsizep = of_get_property(np, "i-cache-block-size", NULL); + lsizep = of_get_property(np, "i-cache-block-size", + NULL); if (lsizep == NULL) - lsizep = of_get_property(np, "i-cache-line-size", NULL); + lsizep = of_get_property(np, + "i-cache-line-size", + NULL); if (lsizep != NULL) lsize = *lsizep; if (sizep == 0 || lsizep == 0) -- cgit v1.2.3 From fb82b83970a32263698e54a8779d2ce88cd3b060 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:44:49 +0000 Subject: powerpc/smp: More generic support for "soft hotplug" This adds more generic support for doing CPU hotplug with a simple idle loop and no actual reset of the processors. The generic smp_generic_kick_cpu() does the hotplug bringup trick if the PACA shows that the CPU has already been started at boot and we provide an accessor for the CPU state. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/smp.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 7bf2187dfd9..af7e7722eca 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -70,6 +70,10 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) + +/* State of each CPU during hotplug phases */ +static DEFINE_PER_CPU(int, cpu_state) = { 0 }; + #else static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define get_idle_for_cpu(x) (idle_thread_array[(x)]) @@ -104,12 +108,25 @@ int __devinit smp_generic_kick_cpu(int nr) * cpu_start field to become non-zero After we set cpu_start, * the processor will continue on to secondary_start */ - paca[nr].cpu_start = 1; - smp_mb(); + if (!paca[nr].cpu_start) { + paca[nr].cpu_start = 1; + smp_mb(); + return 0; + } + +#ifdef CONFIG_HOTPLUG_CPU + /* + * Ok it's not there, so it might be soft-unplugged, let's + * try to bring it back + */ + per_cpu(cpu_state, nr) = CPU_UP_PREPARE; + smp_wmb(); + smp_send_reschedule(nr); +#endif /* CONFIG_HOTPLUG_CPU */ return 0; } -#endif +#endif /* CONFIG_PPC64 */ static irqreturn_t call_function_action(int irq, void *data) { @@ -357,8 +374,6 @@ void __devinit smp_prepare_boot_cpu(void) } #ifdef CONFIG_HOTPLUG_CPU -/* State of each CPU during hotplug phases */ -static DEFINE_PER_CPU(int, cpu_state) = { 0 }; int generic_cpu_disable(void) { @@ -406,6 +421,11 @@ void generic_set_cpu_dead(unsigned int cpu) { per_cpu(cpu_state, cpu) = CPU_DEAD; } + +int generic_check_cpu_restart(unsigned int cpu) +{ + return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; +} #endif struct create_idle { -- cgit v1.2.3 From 781fb7a3e4cdca28236ae23e2c77070ed3ae531f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:44:50 +0000 Subject: powerpc/pci: Call pcie_bus_configure_settings() This new function is used to properly setup the PCI Express Max Payload Size (and in some circumstances Max Read Request Size). Some systems will not operate properly if these aren't set correctly and the firmware doesn't always do it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-common.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 32656f10525..1bd47f36b25 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1730,6 +1730,17 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) if (mode == PCI_PROBE_NORMAL) hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); + + /* Configure PCI Express settings */ + if (bus) { + struct pci_bus *child; + list_for_each_entry(child, &bus->children, node) { + struct pci_dev *self = child->self; + if (!self) + continue; + pcie_bus_configure_settings(child, self->pcie_mpss); + } + } } static void fixup_hide_host_resource_fsl(struct pci_dev *dev) -- cgit v1.2.3 From e550592e689cf8d682937f356497f989f3d88292 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:44:51 +0000 Subject: powerpc/powernv: Don't clobber r9 in relative_toc() With OPAL, r8 and r9 will be used to pass the OPAL base and entry for debugging purposes (those informations are also in the device-tree). We don't want to clobber those registers that early. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 3564c49c683..e708abe576d 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -674,9 +674,9 @@ _GLOBAL(enable_64b_mode) _GLOBAL(relative_toc) mflr r0 bcl 20,31,$+4 -0: mflr r9 - ld r2,(p_toc - 0b)(r9) - add r2,r2,r9 +0: mflr r11 + ld r2,(p_toc - 0b)(r11) + add r2,r2,r11 mtlr r0 blr -- cgit v1.2.3 From 27f4488872d9ef2a4b9aa2be58fb0789d6c0ba84 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 18:27:58 +0000 Subject: powerpc/powernv: Add OPAL takeover from PowerVM On machines supporting the OPAL firmware version 1, the system is initially booted under pHyp. We then use a special hypercall to verify if OPAL is available and if it is, we then trigger a "takeover" which disables pHyp and loads the OPAL runtime firmware, giving control to the kernel in hypervisor mode. This patch add the necessary code to detect that the OPAL takeover capability is present when running under PowerVM (aka pHyp) and perform said takeover to get hypervisor control of the processor. To perform the takeover, we must first use RTAS (within Open Firmware runtime environment) to start all processors & threads, in order to give control to OPAL on all of them. We then call the takeover hypercall on everybody, OPAL will re-enter the kernel main entry point passing it a flat device-tree. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 4 + arch/powerpc/kernel/prom_init.c | 239 ++++++++++++++++++++++++++++++--- arch/powerpc/kernel/prom_init_check.sh | 3 +- 3 files changed, 228 insertions(+), 18 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index e708abe576d..dea8191253d 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -51,6 +51,10 @@ * For pSeries or server processors: * 1. The MMU is off & open firmware is running in real mode. * 2. The kernel is entered at __start + * -or- For OPAL entry: + * 1. The MMU is off, processor in HV mode, primary CPU enters at 0 + * with device-tree in gpr3 + * 2. Secondary processors enter at 0x60 with PIR in gpr3 * * For iSeries: * 1. The MMU is on (as it always is for iSeries) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a909f4e9343..9369287aa8c 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -43,6 +43,7 @@ #include #include #include +#include #include @@ -185,6 +186,7 @@ static unsigned long __initdata prom_tce_alloc_end; #define PLATFORM_LPAR 0x0001 #define PLATFORM_POWERMAC 0x0400 #define PLATFORM_GENERIC 0x0500 +#define PLATFORM_OPAL 0x0600 static int __initdata of_platform; @@ -644,7 +646,7 @@ static void __init early_cmdline_parse(void) } } -#ifdef CONFIG_PPC_PSERIES +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * There are two methods for telling firmware what our capabilities are. * Newer machines have an "ibm,client-architecture-support" method on the @@ -1274,6 +1276,195 @@ static void __init prom_init_mem(void) prom_printf(" ram_top : %x\n", RELOC(ram_top)); } +static void __init prom_close_stdin(void) +{ + struct prom_t *_prom = &RELOC(prom); + ihandle val; + + if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0) + call_prom("close", 1, 0, val); +} + +#ifdef CONFIG_PPC_POWERNV + +static u64 __initdata prom_opal_size; +static u64 __initdata prom_opal_align; +static int __initdata prom_rtas_start_cpu; +static u64 __initdata prom_rtas_data; +static u64 __initdata prom_rtas_entry; + +/* XXX Don't change this structure without updating opal-takeover.S */ +static struct opal_secondary_data { + s64 ack; /* 0 */ + u64 go; /* 8 */ + struct opal_takeover_args args; /* 16 */ +} opal_secondary_data; + +extern char opal_secondary_entry; + +static void prom_query_opal(void) +{ + long rc; + + prom_printf("Querying for OPAL presence... "); + rc = opal_query_takeover(&RELOC(prom_opal_size), + &RELOC(prom_opal_align)); + prom_debug("(rc = %ld) ", rc); + if (rc != 0) { + prom_printf("not there.\n"); + return; + } + RELOC(of_platform) = PLATFORM_OPAL; + prom_printf(" there !\n"); + prom_debug(" opal_size = 0x%lx\n", RELOC(prom_opal_size)); + prom_debug(" opal_align = 0x%lx\n", RELOC(prom_opal_align)); + if (RELOC(prom_opal_align) < 0x10000) + RELOC(prom_opal_align) = 0x10000; +} + +static int prom_rtas_call(int token, int nargs, int nret, int *outputs, ...) +{ + struct rtas_args rtas_args; + va_list list; + int i; + + rtas_args.token = token; + rtas_args.nargs = nargs; + rtas_args.nret = nret; + rtas_args.rets = (rtas_arg_t *)&(rtas_args.args[nargs]); + va_start(list, outputs); + for (i = 0; i < nargs; ++i) + rtas_args.args[i] = va_arg(list, rtas_arg_t); + va_end(list); + + for (i = 0; i < nret; ++i) + rtas_args.rets[i] = 0; + + opal_enter_rtas(&rtas_args, RELOC(prom_rtas_data), + RELOC(prom_rtas_entry)); + + if (nret > 1 && outputs != NULL) + for (i = 0; i < nret-1; ++i) + outputs[i] = rtas_args.rets[i+1]; + return (nret > 0)? rtas_args.rets[0]: 0; +} + +static void __init prom_opal_hold_cpus(void) +{ + int i, cnt, cpu, rc; + long j; + phandle node; + char type[64]; + u32 servers[8]; + struct prom_t *_prom = &RELOC(prom); + void *entry = (unsigned long *)&RELOC(opal_secondary_entry); + struct opal_secondary_data *data = &RELOC(opal_secondary_data); + + prom_debug("prom_opal_hold_cpus: start...\n"); + prom_debug(" - entry = 0x%x\n", entry); + prom_debug(" - data = 0x%x\n", data); + + data->ack = -1; + data->go = 0; + + /* look for cpus */ + for (node = 0; prom_next_node(&node); ) { + type[0] = 0; + prom_getprop(node, "device_type", type, sizeof(type)); + if (strcmp(type, RELOC("cpu")) != 0) + continue; + + /* Skip non-configured cpus. */ + if (prom_getprop(node, "status", type, sizeof(type)) > 0) + if (strcmp(type, RELOC("okay")) != 0) + continue; + + cnt = prom_getprop(node, "ibm,ppc-interrupt-server#s", servers, + sizeof(servers)); + if (cnt == PROM_ERROR) + break; + cnt >>= 2; + for (i = 0; i < cnt; i++) { + cpu = servers[i]; + prom_debug("CPU %d ... ", cpu); + if (cpu == _prom->cpu) { + prom_debug("booted !\n"); + continue; + } + prom_debug("starting ... "); + + /* Init the acknowledge var which will be reset by + * the secondary cpu when it awakens from its OF + * spinloop. + */ + data->ack = -1; + rc = prom_rtas_call(RELOC(prom_rtas_start_cpu), 3, 1, + NULL, cpu, entry, data); + prom_debug("rtas rc=%d ...", rc); + + for (j = 0; j < 100000000 && data->ack == -1; j++) { + HMT_low(); + mb(); + } + HMT_medium(); + if (data->ack != -1) + prom_debug("done, PIR=0x%x\n", data->ack); + else + prom_debug("timeout !\n"); + } + } + prom_debug("prom_opal_hold_cpus: end...\n"); +} + +static void prom_opal_takeover(void) +{ + struct opal_secondary_data *data = &RELOC(opal_secondary_data); + struct opal_takeover_args *args = &data->args; + u64 align = RELOC(prom_opal_align); + u64 top_addr, opal_addr; + + args->k_image = (u64)RELOC(_stext); + args->k_size = _end - _stext; + args->k_entry = 0; + args->k_entry2 = 0x60; + + top_addr = _ALIGN_UP(args->k_size, align); + + if (RELOC(prom_initrd_start) != 0) { + args->rd_image = RELOC(prom_initrd_start); + args->rd_size = RELOC(prom_initrd_end) - args->rd_image; + args->rd_loc = top_addr; + top_addr = _ALIGN_UP(args->rd_loc + args->rd_size, align); + } + + /* Pickup an address for the HAL. We want to go really high + * up to avoid problem with future kexecs. On the other hand + * we don't want to be all over the TCEs on P5IOC2 machines + * which are going to be up there too. We assume the machine + * has plenty of memory, and we ask for the HAL for now to + * be just below the 1G point, or above the initrd + */ + opal_addr = _ALIGN_DOWN(0x40000000 - RELOC(prom_opal_size), align); + if (opal_addr < top_addr) + opal_addr = top_addr; + args->hal_addr = opal_addr; + + prom_debug(" k_image = 0x%lx\n", args->k_image); + prom_debug(" k_size = 0x%lx\n", args->k_size); + prom_debug(" k_entry = 0x%lx\n", args->k_entry); + prom_debug(" k_entry2 = 0x%lx\n", args->k_entry2); + prom_debug(" hal_addr = 0x%lx\n", args->hal_addr); + prom_debug(" rd_image = 0x%lx\n", args->rd_image); + prom_debug(" rd_size = 0x%lx\n", args->rd_size); + prom_debug(" rd_loc = 0x%lx\n", args->rd_loc); + prom_printf("Performing OPAL takeover,this can take a few minutes..\n"); + prom_close_stdin(); + mb(); + data->go = 1; + for (;;) + opal_do_takeover(args); +} +#endif /* CONFIG_PPC_POWERNV */ /* * Allocate room for and instantiate RTAS @@ -1326,6 +1517,12 @@ static void __init prom_instantiate_rtas(void) prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", &entry, sizeof(entry)); +#ifdef CONFIG_PPC_POWERNV + /* PowerVN takeover hack */ + RELOC(prom_rtas_data) = base; + RELOC(prom_rtas_entry) = entry; + prom_getprop(rtas_node, "start-cpu", &RELOC(prom_rtas_start_cpu), 4); +#endif prom_debug("rtas base = 0x%x\n", base); prom_debug("rtas entry = 0x%x\n", entry); prom_debug("rtas size = 0x%x\n", (long)size); @@ -1543,7 +1740,7 @@ static void __init prom_hold_cpus(void) *acknowledge = (unsigned long)-1; if (reg != _prom->cpu) { - /* Primary Thread of non-boot cpu */ + /* Primary Thread of non-boot cpu or any thread */ prom_printf("starting cpu hw idx %lu... ", reg); call_prom("start-cpu", 3, 0, node, secondary_hold, reg); @@ -1652,15 +1849,6 @@ static void __init prom_init_stdout(void) prom_setprop(val, path, "linux,boot-display", NULL, 0); } -static void __init prom_close_stdin(void) -{ - struct prom_t *_prom = &RELOC(prom); - ihandle val; - - if (prom_getprop(_prom->chosen, "stdin", &val, sizeof(val)) > 0) - call_prom("close", 1, 0, val); -} - static int __init prom_find_machine_type(void) { struct prom_t *_prom = &RELOC(prom); @@ -2504,6 +2692,7 @@ static void __init prom_check_initrd(unsigned long r3, unsigned long r4) #endif /* CONFIG_BLK_DEV_INITRD */ } + /* * We enter here early on, when the Open Firmware prom is still * handling exceptions and the MMU hash table for us. @@ -2565,7 +2754,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); -#ifdef CONFIG_PPC_PSERIES +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * On pSeries, inform the firmware about our capabilities */ @@ -2611,14 +2800,30 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, #endif /* - * On non-powermacs, try to instantiate RTAS and puts all CPUs - * in spin-loops. PowerMacs don't have a working RTAS and use - * a different way to spin CPUs + * On non-powermacs, try to instantiate RTAS. PowerMacs don't + * have a usable RTAS implementation. */ - if (RELOC(of_platform) != PLATFORM_POWERMAC) { + if (RELOC(of_platform) != PLATFORM_POWERMAC) prom_instantiate_rtas(); - prom_hold_cpus(); + +#ifdef CONFIG_PPC_POWERNV + /* Detect HAL and try instanciating it & doing takeover */ + if (RELOC(of_platform) == PLATFORM_PSERIES_LPAR) { + prom_query_opal(); + if (RELOC(of_platform) == PLATFORM_OPAL) { + prom_opal_hold_cpus(); + prom_opal_takeover(); + } } +#endif + + /* + * On non-powermacs, put all CPUs in spin-loops. + * + * PowerMacs use a different mechanism to spin CPUs + */ + if (RELOC(of_platform) != PLATFORM_POWERMAC) + prom_hold_cpus(); /* * Fill in some infos for use by the kernel later on diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 9f82f493789..20af6aada51 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -20,7 +20,8 @@ WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush _end enter_prom memcpy memset reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 -reloc_got2 kernstart_addr memstart_addr linux_banner" +reloc_got2 kernstart_addr memstart_addr linux_banner _stext +opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry" NM="$1" OBJ="$2" -- cgit v1.2.3 From 817c21ad9a1f00926f080265493923ada3458c63 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:44:56 +0000 Subject: powerpc/powernv: Get kernel command line accross OPAL takeover We stash it in boot_command_line which isn't in BSS and so won't be overwritten. We then use that as a default cmd_line before we walk the device-tree. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 7 +++++++ arch/powerpc/kernel/prom_init.c | 4 ++++ arch/powerpc/kernel/prom_init_check.sh | 3 ++- 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 174e1e96175..7b90c564e93 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -712,6 +712,13 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); #endif + /* Pre-initialize the cmd_line with the content of boot_commmand_line, + * which will be empty except when the content of the variable has + * been overriden by a bootloading mechanism. This happens typically + * with HAL takeover + */ + strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); + /* Retrieve various informations from the /chosen node of the * device-tree, including the platform type, initrd location and * size, TCE reserve, and more ... diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 9369287aa8c..e3f39042721 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1449,6 +1449,10 @@ static void prom_opal_takeover(void) opal_addr = top_addr; args->hal_addr = opal_addr; + /* Copy the command line to the kernel image */ + strlcpy(RELOC(boot_command_line), RELOC(prom_cmd_line), + COMMAND_LINE_SIZE); + prom_debug(" k_image = 0x%lx\n", args->k_image); prom_debug(" k_size = 0x%lx\n", args->k_size); prom_debug(" k_entry = 0x%lx\n", args->k_entry); diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh index 20af6aada51..70f4286eaa7 100644 --- a/arch/powerpc/kernel/prom_init_check.sh +++ b/arch/powerpc/kernel/prom_init_check.sh @@ -21,7 +21,8 @@ _end enter_prom memcpy memset reloc_offset __secondary_hold __secondary_hold_acknowledge __secondary_hold_spinloop __start strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 reloc_got2 kernstart_addr memstart_addr linux_banner _stext -opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry" +opal_query_takeover opal_do_takeover opal_enter_rtas opal_secondary_entry +boot_command_line" NM="$1" OBJ="$2" -- cgit v1.2.3 From 14a43e69ed257a1fadadf9fea2c05adb1686419f Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:44:57 +0000 Subject: powerpc/powernv: Basic support for OPAL Add definition of OPAL interfaces along with the wrappers to call into OPAL runtime and the early device-tree parsing hook to locate the OPAL runtime firmware. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 7b90c564e93..831a201e03d 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -54,6 +54,8 @@ #include #include #include +#include + #include #ifdef DEBUG @@ -707,6 +709,11 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_rtas, NULL); #endif +#ifdef CONFIG_PPC_POWERNV + /* Some machines might need OPAL info for debugging, grab it now. */ + of_scan_flat_dt(early_init_dt_scan_opal, NULL); +#endif + #ifdef CONFIG_PHYP_DUMP /* scan tree to see if dump occurred during last boot */ of_scan_flat_dt(early_init_dt_scan_phyp_dump, NULL); -- cgit v1.2.3 From 6e35d5dac0c83ebb616ff3b9c2d6155c9a9ccb86 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 18:28:01 +0000 Subject: powerpc/powernv: Add support for instanciating OPAL v2 from Open Firmware OPAL v2 is instantiated in a way similar to RTAS using Open Firmware client interface calls, and the resulting address and entry point are put in the device-tree Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 136 ++++++++++++++++++++++++++++++++++------ 1 file changed, 117 insertions(+), 19 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index e3f39042721..e96f5d0d2c7 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -140,7 +140,9 @@ struct mem_map_entry { typedef u32 cell_t; -extern void __start(unsigned long r3, unsigned long r4, unsigned long r5); +extern void __start(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7, unsigned long r8, + unsigned long r9); #ifdef CONFIG_PPC64 extern int enter_prom(struct prom_args *args, unsigned long entry); @@ -1293,6 +1295,11 @@ static int __initdata prom_rtas_start_cpu; static u64 __initdata prom_rtas_data; static u64 __initdata prom_rtas_entry; +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL +static u64 __initdata prom_opal_base; +static u64 __initdata prom_opal_entry; +#endif + /* XXX Don't change this structure without updating opal-takeover.S */ static struct opal_secondary_data { s64 ack; /* 0 */ @@ -1468,6 +1475,76 @@ static void prom_opal_takeover(void) for (;;) opal_do_takeover(args); } + +/* + * Allocate room for and instantiate OPAL + */ +static void __init prom_instantiate_opal(void) +{ + phandle opal_node; + ihandle opal_inst; + u64 base, entry; + u64 size = 0, align = 0x10000; + u32 rets[2]; + + prom_debug("prom_instantiate_opal: start...\n"); + + opal_node = call_prom("finddevice", 1, 1, ADDR("/ibm,opal")); + prom_debug("opal_node: %x\n", opal_node); + if (!PHANDLE_VALID(opal_node)) + return; + + prom_getprop(opal_node, "opal-runtime-size", &size, sizeof(size)); + if (size == 0) + return; + prom_getprop(opal_node, "opal-runtime-alignment", &align, + sizeof(align)); + + base = alloc_down(size, align, 0); + if (base == 0) { + prom_printf("OPAL allocation failed !\n"); + return; + } + + opal_inst = call_prom("open", 1, 1, ADDR("/ibm,opal")); + if (!IHANDLE_VALID(opal_inst)) { + prom_printf("opening opal package failed (%x)\n", opal_inst); + return; + } + + prom_printf("instantiating opal at 0x%x...", base); + + if (call_prom_ret("call-method", 4, 3, rets, + ADDR("load-opal-runtime"), + opal_inst, + base >> 32, base & 0xffffffff) != 0 + || (rets[0] == 0 && rets[1] == 0)) { + prom_printf(" failed\n"); + return; + } + entry = (((u64)rets[0]) << 32) | rets[1]; + + prom_printf(" done\n"); + + reserve_mem(base, size); + + prom_debug("opal base = 0x%x\n", base); + prom_debug("opal align = 0x%x\n", align); + prom_debug("opal entry = 0x%x\n", entry); + prom_debug("opal size = 0x%x\n", (long)size); + + prom_setprop(opal_node, "/ibm,opal", "opal-base-address", + &base, sizeof(base)); + prom_setprop(opal_node, "/ibm,opal", "opal-entry-address", + &entry, sizeof(entry)); + +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL + RELOC(prom_opal_base) = base; + RELOC(prom_opal_entry) = entry; +#endif + prom_debug("prom_instantiate_opal: end...\n"); +} + #endif /* CONFIG_PPC_POWERNV */ /* @@ -1863,7 +1940,7 @@ static int __init prom_find_machine_type(void) int x; #endif - /* Look for a PowerMac */ + /* Look for a PowerMac or a Cell */ len = prom_getprop(_prom->root, "compatible", compat, sizeof(compat)-1); if (len > 0) { @@ -1889,7 +1966,11 @@ static int __init prom_find_machine_type(void) } } #ifdef CONFIG_PPC64 - /* If not a mac, try to figure out if it's an IBM pSeries or any other + /* Try to detect OPAL */ + if (PHANDLE_VALID(call_prom("finddevice", 1, 1, ADDR("/ibm,opal")))) + return PLATFORM_OPAL; + + /* Try to figure out if it's an IBM pSeries or any other * PAPR compliant platform. We assume it is if : * - /device_type is "chrp" (please, do NOT use that for future * non-IBM designs ! @@ -2116,7 +2197,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, unsigned long soff; unsigned char *valp; static char pname[MAX_PROPERTY_NAME]; - int l, room; + int l, room, has_phandle = 0; dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); @@ -2200,19 +2281,26 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, valp = make_room(mem_start, mem_end, l, 4); call_prom("getprop", 4, 1, node, RELOC(pname), valp, l); *mem_start = _ALIGN(*mem_start, 4); + + if (!strcmp(RELOC(pname), RELOC("phandle"))) + has_phandle = 1; } - /* Add a "linux,phandle" property. */ - soff = dt_find_string(RELOC("linux,phandle")); - if (soff == 0) - prom_printf("WARNING: Can't find string index for" - " node %s\n", path); - else { - dt_push_token(OF_DT_PROP, mem_start, mem_end); - dt_push_token(4, mem_start, mem_end); - dt_push_token(soff, mem_start, mem_end); - valp = make_room(mem_start, mem_end, 4, 4); - *(u32 *)valp = node; + /* Add a "linux,phandle" property if no "phandle" property already + * existed (can happen with OPAL) + */ + if (!has_phandle) { + soff = dt_find_string(RELOC("linux,phandle")); + if (soff == 0) + prom_printf("WARNING: Can't find string index for" + " node %s\n", path); + else { + dt_push_token(OF_DT_PROP, mem_start, mem_end); + dt_push_token(4, mem_start, mem_end); + dt_push_token(soff, mem_start, mem_end); + valp = make_room(mem_start, mem_end, 4, 4); + *(u32 *)valp = node; + } } /* do all our children */ @@ -2746,6 +2834,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * between pSeries SMP and pSeries LPAR */ RELOC(of_platform) = prom_find_machine_type(); + prom_printf("Detected machine type: %x\n", RELOC(of_platform)); #ifndef CONFIG_RELOCATABLE /* Bail if this is a kdump kernel. */ @@ -2807,7 +2896,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * On non-powermacs, try to instantiate RTAS. PowerMacs don't * have a usable RTAS implementation. */ - if (RELOC(of_platform) != PLATFORM_POWERMAC) + if (RELOC(of_platform) != PLATFORM_POWERMAC && + RELOC(of_platform) != PLATFORM_OPAL) prom_instantiate_rtas(); #ifdef CONFIG_PPC_POWERNV @@ -2818,7 +2908,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, prom_opal_hold_cpus(); prom_opal_takeover(); } - } + } else if (RELOC(of_platform) == PLATFORM_OPAL) + prom_instantiate_opal(); #endif /* @@ -2826,7 +2917,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, * * PowerMacs use a different mechanism to spin CPUs */ - if (RELOC(of_platform) != PLATFORM_POWERMAC) + if (RELOC(of_platform) != PLATFORM_POWERMAC && + RELOC(of_platform) != PLATFORM_OPAL) prom_hold_cpus(); /* @@ -2894,7 +2986,13 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, reloc_got2(-offset); #endif - __start(hdr, kbase, 0); +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL + /* OPAL early debug gets the OPAL base & entry in r8 and r9 */ + __start(hdr, kbase, 0, 0, 0, + RELOC(prom_opal_base), RELOC(prom_opal_entry)); +#else + __start(hdr, kbase, 0, 0, 0, 0, 0); +#endif return 0; } -- cgit v1.2.3 From daea1175a9f0f70eab5b33e2827d57ba8c686816 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:44:59 +0000 Subject: powerpc/powernv: Support for OPAL console This adds a udbg and an hvc console backend for supporting a console using the OPAL console interfaces. On OPAL v1 we have hvc0 mapped to whatever console the system was configured for (network or hvsi serial port) via the service processor. On OPAL v2 we have hvcN mapped to the Nth console provided by OPAL which generally corresponds to: hvc0 : network console (raw protocol) hvc1 : serial port S1 (hvsi) hvc2 : serial port S2 (hvsi) Note: At this point, early debug console only works with OPAL v1 and shouldn't be enabled in a normal kernel. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 14 +++++++++++++- arch/powerpc/kernel/udbg.c | 4 ++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index dea8191253d..06c7251c1bf 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -53,7 +53,8 @@ * 2. The kernel is entered at __start * -or- For OPAL entry: * 1. The MMU is off, processor in HV mode, primary CPU enters at 0 - * with device-tree in gpr3 + * with device-tree in gpr3. We also get OPAL base in r8 and + * entry in r9 for debugging purposes * 2. Secondary processors enter at 0x60 with PIR in gpr3 * * For iSeries: @@ -335,6 +336,11 @@ _GLOBAL(__start_initialization_multiplatform) /* Save parameters */ mr r31,r3 mr r30,r4 +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL + /* Save OPAL entry */ + mr r28,r8 + mr r29,r9 +#endif #ifdef CONFIG_PPC_BOOK3E bl .start_initialization_book3e @@ -711,6 +717,12 @@ _INIT_STATIC(start_here_multiplatform) bdnz 3b 4: +#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL + /* Setup OPAL entry */ + std r28,0(r11); + std r29,8(r11); +#endif + #ifndef CONFIG_PPC_BOOK3E mfmsr r6 ori r6,r6,MSR_RI diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 5b3e98e0315..35f948203ec 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -69,6 +69,10 @@ void __init udbg_early_init(void) udbg_init_wsp(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PS3GELIC) udbg_init_ps3gelic(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_RAW) + udbg_init_debug_opal_raw(); +#elif defined(CONFIG_PPC_EARLY_DEBUG_OPAL_HVSI) + udbg_init_debug_opal_hvsi(); #endif #ifdef CONFIG_PPC_EARLY_DEBUG -- cgit v1.2.3 From ed79ba9e15f84cef05aba5cbfe6e93f9b43c31f4 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 19 Sep 2011 17:45:04 +0000 Subject: powerpc/powernv: Machine check and other system interrupts OPAL can handle various interrupt for us such as Machine Checks (it performs all sorts of recovery tasks and passes back control to us with informations about the error), Hardware Management Interrupts and Softpatch interrupts. This wires up the mechanisms and prints out specific informations returned by HAL when a machine check occurs. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/asm-offsets.c | 10 ++++++++++ arch/powerpc/kernel/exceptions-64s.S | 27 +++++++++++++++++++++++---- 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 5f078bc2063..536ffa897c6 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -48,6 +48,9 @@ #ifdef CONFIG_PPC_ISERIES #include #endif +#ifdef CONFIG_PPC_POWERNV +#include +#endif #if defined(CONFIG_KVM) || defined(CONFIG_KVM_GUEST) #include #endif @@ -609,5 +612,12 @@ int main(void) arch.timing_last_enter.tv32.tbl)); #endif +#ifdef CONFIG_PPC_POWERNV + DEFINE(OPAL_MC_GPR3, offsetof(struct opal_machine_check_event, gpr3)); + DEFINE(OPAL_MC_SRR0, offsetof(struct opal_machine_check_event, srr0)); + DEFINE(OPAL_MC_SRR1, offsetof(struct opal_machine_check_event, srr1)); + DEFINE(PACA_OPAL_MC_EVT, offsetof(struct paca_struct, opal_mc_evt)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 41b02c792aa..d51458fa8de 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1143,7 +1143,7 @@ _GLOBAL(do_stab_bolted) rfid b . /* prevent speculative execution */ -#ifdef CONFIG_PPC_PSERIES +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * Data area reserved for FWNMI option. * This address (0x7000) is fixed by the RPA. @@ -1151,7 +1151,7 @@ _GLOBAL(do_stab_bolted) .= 0x7000 .globl fwnmi_data_area fwnmi_data_area: -#endif /* CONFIG_PPC_PSERIES */ +#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ /* iSeries does not use the FWNMI stuff, so it is safe to put * this here, even if we later allow kernels that will boot on @@ -1176,9 +1176,12 @@ xLparMap: #endif /* CONFIG_PPC_ISERIES */ -#ifdef CONFIG_PPC_PSERIES +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) + /* pseries and powernv need to keep the whole page from + * 0x7000 to 0x8000 free for use by the firmware + */ . = 0x8000 -#endif /* CONFIG_PPC_PSERIES */ +#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ /* * Space for CPU0's segment table. @@ -1193,3 +1196,19 @@ xLparMap: .globl initial_stab initial_stab: .space 4096 +#ifdef CONFIG_PPC_POWERNV +_GLOBAL(opal_mc_secondary_handler) + HMT_MEDIUM + SET_SCRATCH0(r13) + GET_PACA(r13) + clrldi r3,r3,2 + tovirt(r3,r3) + std r3,PACA_OPAL_MC_EVT(r13) + ld r13,OPAL_MC_SRR0(r3) + mtspr SPRN_SRR0,r13 + ld r13,OPAL_MC_SRR1(r3) + mtspr SPRN_SRR1,r13 + ld r3,OPAL_MC_GPR3(r3) + GET_SCRATCH0(r13) + b machine_check_pSeries +#endif /* CONFIG_PPC_POWERNV */ -- cgit v1.2.3 From a120db06c3f435c37d028b6e5a1968dad06b7df0 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Thu, 8 Sep 2011 21:12:06 +0000 Subject: perf events, powerpc: Add POWER7 stalled-cycles-frontend/backend events perf events, powerpc: Add POWER7 stalled-cycles-frontend/backend events Extent the POWER7 PMU driver with definitions for generic front-end and back-end stall events. As explained in Ingo's original comment(8f62242246351b5a4bc0c1f00c0c7003edea128a ), the exact definitions of the stall events are very much processor specific as different things mean different in their respective instruction pipeline. These two Power7 raw events are the closest approximation to the concept detailed in Ingo's comment. [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x100f8, /* GCT_NOSLOT_CYC */ It means cycles when the Global Completion Table has no slots from this thread [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x4000a, /* CMPLU_STALL */ It means no groups completed and GCT not empty for this thread Signed-off-by: Anshuman Khandual Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/power7-pmu.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/power7-pmu.c b/arch/powerpc/kernel/power7-pmu.c index de83d6060dd..1251e4d7e26 100644 --- a/arch/powerpc/kernel/power7-pmu.c +++ b/arch/powerpc/kernel/power7-pmu.c @@ -297,6 +297,8 @@ static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[]) static int power7_generic_events[] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x100f8, /* GCT_NOSLOT_CYC */ + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x4000a, /* CMPLU_STALL */ [PERF_COUNT_HW_INSTRUCTIONS] = 2, [PERF_COUNT_HW_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU*/ [PERF_COUNT_HW_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ -- cgit v1.2.3 From d12b524f8b2f4e45cabe8bc1501e8b967d543111 Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Tue, 20 Sep 2011 03:07:24 +0000 Subject: powerpc: Reserve iommu page 0 Some devices have a dma-window that starts at the address 0. This allows DMA addresses to be mapped to this address and returned to drivers as a valid DMA address. Some drivers may not behave well in this case, since the address 0 is considered an error or not allocated. The solution to avoid this kind of error from happening is reserve the page addressed as 0 so it cannot be allocated for a DMA mapping. Signed-off-by: Thadeu Lima de Souza Cascardo Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/iommu.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 961bb03413f..0cfcf98aafc 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -501,6 +501,14 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid) tbl->it_map = page_address(page); memset(tbl->it_map, 0, sz); + /* + * Reserve page 0 so it will not be used for any mappings. + * This avoids buggy drivers that consider page 0 to be invalid + * to crash the machine or even lose data. + */ + if (tbl->it_offset == 0) + set_bit(0, tbl->it_map); + tbl->it_hint = 0; tbl->it_largehint = tbl->it_halfpoint; spin_lock_init(&tbl->it_lock); -- cgit v1.2.3 From bb36c44557a4fcbaa17c0f2776e12a05a691b432 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 26 Sep 2011 14:22:39 +1000 Subject: powerpc/pci: Don't configure PCIe settings when PCI_PROBE_ONLY is set We don't want to configure PCI Express Max Payload Size or Max Read Request Size on systems that set that flag. The firmware will have done it for us, and under hypervisors such as pHyp we don't even see the parent switches and bridges and thus can make no assumption on what values are safe to use. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 1bd47f36b25..677ecccbe10 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1732,7 +1732,7 @@ void __devinit pcibios_scan_phb(struct pci_controller *hose) hose->last_busno = bus->subordinate = pci_scan_child_bus(bus); /* Configure PCI Express settings */ - if (bus) { + if (bus && !pci_has_flag(PCI_PROBE_ONLY)) { struct pci_bus *child; list_for_each_entry(child, &bus->children, node) { struct pci_dev *self = child->self; -- cgit v1.2.3 From e69b742a6793dc5bf16f6eedca534d4bc10d68b2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 26 Sep 2011 19:37:57 +0000 Subject: powerpc/ptrace: Fix build with gcc 4.6 gcc (rightfully) complains that we are accessing beyond the end of the fpr array (we do, to access the fpscr). The only sane thing to do (whether anything in that code can be called remotely sane is debatable) is to special case fpscr and handle it as a separate statement. I initially tried to do it it by making the array access conditional to index < PT_FPSCR and using a 3rd else leg but for some reason gcc was unable to understand it and still spewed the warning. So I ended up with something a tad more intricated but it seems to build on 32-bit and on 64-bit with and without VSX. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/ptrace.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 05b7dd217f6..18447c4fbad 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1497,9 +1497,14 @@ long arch_ptrace(struct task_struct *child, long request, if (index < PT_FPR0) { tmp = ptrace_get_reg(child, (int) index); } else { + unsigned int fpidx = index - PT_FPR0; + flush_fp_to_thread(child); - tmp = ((unsigned long *)child->thread.fpr) - [TS_FPRWIDTH * (index - PT_FPR0)]; + if (fpidx < (PT_FPSCR - PT_FPR0)) + tmp = ((unsigned long *)child->thread.fpr) + [fpidx * TS_FPRWIDTH]; + else + tmp = child->thread.fpscr.val; } ret = put_user(tmp, datalp); break; @@ -1525,9 +1530,14 @@ long arch_ptrace(struct task_struct *child, long request, if (index < PT_FPR0) { ret = ptrace_put_reg(child, index, data); } else { + unsigned int fpidx = index - PT_FPR0; + flush_fp_to_thread(child); - ((unsigned long *)child->thread.fpr) - [TS_FPRWIDTH * (index - PT_FPR0)] = data; + if (fpidx < (PT_FPSCR - PT_FPR0)) + ((unsigned long *)child->thread.fpr) + [fpidx * TS_FPRWIDTH] = data; + else + child->thread.fpscr.val = data; ret = 0; } break; -- cgit v1.2.3 From d15f02eb4e8992cfacfca2ff306e5585bcf721d1 Mon Sep 17 00:00:00 2001 From: "Carl E. Love" Date: Wed, 28 Sep 2011 11:23:33 +0000 Subject: powerpc/perf_event: Fix Power6 L1 cache read & write event codes] The current L1 cache read event code 0x80082 only counts for thread 0. The event code 0x280030 should be used to count events on thread 0 and 1. The patch fixes the event code for the L1 cache read. The current L1 cache write event code 0x80086 only counts for thread 0. The event code 0x180032 should be used to count events on thread 0 and 1. The patch fixes the event code for the L1 cache write. FYI, the documentation lists three event codes for the L1 cache read event and three event codes for the L1 cache write event. The event description for the event codes is as follows: L1 cache read requests 0x80082 LSU 0 only L1 cache read requests 0x8008A LSU 1 only L1 cache read requests 0x80030 LSU 1 or LSU 0, counter 2 only. L1 cache store requests 0x80086 LSU 0 only L1 cache store requests 0x8008E LSU 1 only L1 cache store requests 0x80032 LSU 0 or LSU 1, counter 1 only. There can only be one request from either LSU 0 or 1 active at a time. Signed-off-by: Carl Love Acked-by: Paul Mackerras Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/power6-pmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/power6-pmu.c b/arch/powerpc/kernel/power6-pmu.c index 03b95e2c6d6..0bbc901e7ef 100644 --- a/arch/powerpc/kernel/power6-pmu.c +++ b/arch/powerpc/kernel/power6-pmu.c @@ -487,8 +487,8 @@ static int power6_generic_events[] = { */ static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x80082, 0x80080 }, - [C(OP_WRITE)] = { 0x80086, 0x80088 }, + [C(OP_READ)] = { 0x280030, 0x80080 }, + [C(OP_WRITE)] = { 0x180032, 0x80088 }, [C(OP_PREFETCH)] = { 0x810a4, 0 }, }, [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ -- cgit v1.2.3 From 7680057cc4c7d9caada12767831bfd9738dd7b43 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 28 Sep 2011 20:51:46 +0000 Subject: powerpc: Don't try OPAL takeover on old 970 blades The firmware on old 970 blades supports some kind of takeover called "TNK takeover" which will crash if we try to probe for OPAL takeover, so don't do it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom_init.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index e96f5d0d2c7..b4fa6612749 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1313,6 +1313,16 @@ static void prom_query_opal(void) { long rc; + /* We must not query for OPAL presence on a machine that + * supports TNK takeover (970 blades), as this uses the same + * h-call with different arguments and will crash + */ + if (PHANDLE_VALID(call_prom("finddevice", 1, 1, + ADDR("/tnk-memory-map")))) { + prom_printf("TNK takeover detected, skipping OPAL check\n"); + return; + } + prom_printf("Querying for OPAL presence... "); rc = opal_query_takeover(&RELOC(prom_opal_size), &RELOC(prom_opal_align)); -- cgit v1.2.3 From 37caf9f2a1b99d11ba71e17168d221da9ca13f24 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Sat, 27 Aug 2011 06:14:23 -0500 Subject: powerpc/fsl-booke: Handle L1 D-cache parity error correctly on e500mc If the L1 D-Cache is in write shadow mode the HW will auto-recover the error. However we might still log the error and cause a machine check (if L1CSR0[CPE] - Cache error checking enable). We should only treat the non-write shadow case as non-recoverable. Signed-off-by: Kumar Gala --- arch/powerpc/kernel/traps.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f19d9777d3c..4e5908264d1 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -457,7 +457,14 @@ int machine_check_e500mc(struct pt_regs *regs) if (reason & MCSR_DCPERR_MC) { printk("Data Cache Parity Error\n"); - recoverable = 0; + + /* + * In write shadow mode we auto-recover from the error, but it + * may still get logged and cause a machine check. We should + * only treat the non-write shadow case as non-recoverable. + */ + if (!(mfspr(SPRN_L1CSR2) & L1CSR2_DCWS)) + recoverable = 0; } if (reason & MCSR_L2MMU_MHIT) { -- cgit v1.2.3 From e33ee8b6f473b33a41c281f5329d9d7b3bed8ff5 Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Tue, 11 Oct 2011 11:26:08 +0530 Subject: powerpc: e500mc: Fix: use CONFIG_PPC_E500MC in idle_e500.S It is wrongly using undefined CONFIG_E500MC. Signed-off-by: Bharat Bhushan Signed-off-by: Kumar Gala --- arch/powerpc/kernel/idle_e500.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/idle_e500.S b/arch/powerpc/kernel/idle_e500.S index 3e2b95c6ae6..4f0ab85f378 100644 --- a/arch/powerpc/kernel/idle_e500.S +++ b/arch/powerpc/kernel/idle_e500.S @@ -26,7 +26,7 @@ _GLOBAL(e500_idle) ori r4,r4,_TLF_NAPPING /* so when we take an exception */ stw r4,TI_LOCAL_FLAGS(r3) /* it will return to our caller */ -#ifdef CONFIG_E500MC +#ifdef CONFIG_PPC_E500MC wrteei 1 1: wait -- cgit v1.2.3 From ba14f6491768acad5cf50a3c7dc8927b7614d692 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Fri, 16 Sep 2011 10:39:58 -0500 Subject: powerpc: respect mem= setting for early memory limit setup For those MMUs that have some form of bolt'd linear mapping (TLB) required its rare that one ever sets mem= smaller than the size of that mapping. However, on Book-E 64 parts the initial linear mapping is quite large (1G) so its quite reasonable that mem= is set smaller than that. We need to parse the command line for mem= limit and constrain the amount of memory we map initially by it if need be. Signed-off-by: Kumar Gala --- arch/powerpc/kernel/prom.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 831a201e03d..8ad825c063c 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -737,12 +737,15 @@ void __init early_init_devtree(void *params) of_scan_flat_dt(early_init_dt_scan_root, NULL); of_scan_flat_dt(early_init_dt_scan_memory_ppc, NULL); - setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Save command line for /proc/cmdline and then parse parameters */ strlcpy(boot_command_line, cmd_line, COMMAND_LINE_SIZE); parse_early_param(); + /* make sure we've parsed cmdline for mem= before this */ + if (memory_limit) + first_memblock_size = min(first_memblock_size, memory_limit); + setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ memblock_reserve(PHYSICAL_START, __pa(klimit) - PHYSICAL_START); /* If relocatable, reserve first 32k for interrupt vectors etc. */ -- cgit v1.2.3 From 7d0d3ad5e3cfbf96aa4a2d6ee26a20c98a29d4a1 Mon Sep 17 00:00:00 2001 From: Matthew McClintock Date: Tue, 25 Oct 2011 17:54:03 -0500 Subject: powerpc/fsl_booke: Fix comment in head_fsl_booke.S Fix typo in comments introduced by: commit 6dece0eb69b2a28e18d104bc5d707f1cb673f5e0 Author: Scott Wood Date: Mon Jul 25 11:29:33 2011 +0000 powerpc/32: Pass device tree address as u64 to machine_init Signed-off-by: Matthew McClintock cc: Scott Wood Signed-off-by: Kumar Gala --- arch/powerpc/kernel/head_fsl_booke.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/kernel') diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index e1c699f3b7a..9f5d210ddf3 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -80,8 +80,8 @@ _ENTRY(_start); slw r18,r18,r17 /* r18 = page size */ addi r18,r18,-1 and r19,r3,r18 /* r19 = page offset */ - andc r31,r20,r18 /* r3 = page base */ - or r31,r31,r19 /* r3 = devtree phys addr */ + andc r31,r20,r18 /* r31 = page base */ + or r31,r31,r19 /* r31 = devtree phys addr */ mfspr r30,SPRN_MAS7 li r25,0 /* phys kernel start (low) */ -- cgit v1.2.3