Upstream-Status: Pending Implements basic e5500 enablement in gcc, with a scheduler, -mcpu flag, etc... Also splits the masks for popcntb, popcntd, and cmpb. Originally those masks would also control other instructions that e5500 does not support (so, we either get none or all). For the lack of means to do tests, those instructions were never enabled until now. The new instructions enabled with this patch are: popcntb, popcntw, popcntd, bpermd, prtyw, prtyd, cmpb, ldbrx, and stdbrx. Signed-off-by: Edmar Wienskoski Signed-off-by: Kumar Gala Index: gcc-4_7-branch/gcc/config.gcc =================================================================== --- gcc-4_7-branch.orig/gcc/config.gcc 2012-04-10 10:32:36.823374222 -0700 +++ gcc-4_7-branch/gcc/config.gcc 2012-04-10 10:39:32.543394369 -0700 @@ -413,7 +413,7 @@ extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h" need_64bit_hwint=yes case x$with_cpu in - xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[34567]|xpower6x|xrs64a|xcell|xa2|xe500mc64) + xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[34567]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500) cpu_is_64bit=yes ;; esac @@ -3342,7 +3342,7 @@ | 401 | 403 | 405 | 405fp | 440 | 440fp | 464 | 464fp \ | 476 | 476fp | 505 | 601 | 602 | 603 | 603e | ec603e \ | 604 | 604e | 620 | 630 | 740 | 750 | 7400 | 7450 \ - | a2 | e300c[23] | 854[08] | e500mc | e500mc64 | titan\ + | a2 | e300c[23] | 854[08] | e500mc | e500mc64 | e5500 | titan\ | 801 | 821 | 823 | 860 | 970 | G3 | G4 | G5 | cell) # OK ;; Index: gcc-4_7-branch/gcc/config/rs6000/e5500.md =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ gcc-4_7-branch/gcc/config/rs6000/e5500.md 2012-04-10 10:39:32.543394369 -0700 @@ -0,0 +1,176 @@ +;; Pipeline description for Freescale PowerPC e5500 core. +;; Copyright (C) 2011 Free Software Foundation, Inc. +;; Contributed by Edmar Wienskoski (edmar@freescale.com) +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published +;; by the Free Software Foundation; either version 3, or (at your +;; option) any later version. +;; +;; GCC is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; . +;; +;; e5500 64-bit SFX(2), CFX, LSU, FPU, BU +;; Max issue 3 insns/clock cycle (includes 1 branch) + +(define_automaton "e5500_most,e5500_long") +(define_cpu_unit "e5500_decode_0,e5500_decode_1" "e5500_most") + +;; SFX. +(define_cpu_unit "e5500_sfx_0,e5500_sfx_1" "e5500_most") + +;; CFX. +(define_cpu_unit "e5500_cfx_stage0,e5500_cfx_stage1" "e5500_most") + +;; Non-pipelined division. +(define_cpu_unit "e5500_cfx_div" "e5500_long") + +;; LSU. +(define_cpu_unit "e5500_lsu" "e5500_most") + +;; FPU. +(define_cpu_unit "e5500_fpu" "e5500_long") + +;; BU. +(define_cpu_unit "e5500_bu" "e5500_most") + +;; The following units are used to make the automata deterministic. +(define_cpu_unit "present_e5500_decode_0" "e5500_most") +(define_cpu_unit "present_e5500_sfx_0" "e5500_most") +(presence_set "present_e5500_decode_0" "e5500_decode_0") +(presence_set "present_e5500_sfx_0" "e5500_sfx_0") + +;; Some useful abbreviations. +(define_reservation "e5500_decode" + "e5500_decode_0|e5500_decode_1+present_e5500_decode_0") +(define_reservation "e5500_sfx" + "e5500_sfx_0|e5500_sfx_1+present_e5500_sfx_0") + +;; SFX. +(define_insn_reservation "e5500_sfx" 1 + (and (eq_attr "type" "integer,insert_word,insert_dword,delayed_compare,\ + shift,cntlz,exts") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx") + +(define_insn_reservation "e5500_sfx2" 2 + (and (eq_attr "type" "cmp,compare,fast_compare,trap") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx") + +(define_insn_reservation "e5500_delayed" 2 + (and (eq_attr "type" "var_shift_rotate,var_delayed_compare,popcnt") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx*2") + +(define_insn_reservation "e5500_two" 2 + (and (eq_attr "type" "two") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_decode+e5500_sfx,e5500_sfx") + +(define_insn_reservation "e5500_three" 3 + (and (eq_attr "type" "three") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,(e5500_decode+e5500_sfx)*2,e5500_sfx") + +;; SFX - Mfcr. +(define_insn_reservation "e5500_mfcr" 4 + (and (eq_attr "type" "mfcr") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx_0*4") + +;; SFX - Mtcrf. +(define_insn_reservation "e5500_mtcrf" 1 + (and (eq_attr "type" "mtcr") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx_0") + +;; SFX - Mtjmpr. +(define_insn_reservation "e5500_mtjmpr" 1 + (and (eq_attr "type" "mtjmpr,mfjmpr") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_sfx") + +;; CFX - Multiply. +(define_insn_reservation "e5500_multiply" 4 + (and (eq_attr "type" "imul") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_cfx_stage0,e5500_cfx_stage1") + +(define_insn_reservation "e5500_multiply_i" 5 + (and (eq_attr "type" "imul2,imul3,imul_compare") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_cfx_stage0,\ + e5500_cfx_stage0+e5500_cfx_stage1,e5500_cfx_stage1") + +;; CFX - Divide. +(define_insn_reservation "e5500_divide" 16 + (and (eq_attr "type" "idiv") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_cfx_stage0+e5500_cfx_div,\ + e5500_cfx_div*15") + +(define_insn_reservation "e5500_divide_d" 26 + (and (eq_attr "type" "ldiv") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_cfx_stage0+e5500_cfx_div,\ + e5500_cfx_div*25") + +;; LSU - Loads. +(define_insn_reservation "e5500_load" 3 + (and (eq_attr "type" "load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,\ + load_l,sync") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_lsu") + +(define_insn_reservation "e5500_fpload" 4 + (and (eq_attr "type" "fpload,fpload_ux,fpload_u") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_lsu") + +;; LSU - Stores. +(define_insn_reservation "e5500_store" 3 + (and (eq_attr "type" "store,store_ux,store_u,store_c") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_lsu") + +(define_insn_reservation "e5500_fpstore" 3 + (and (eq_attr "type" "fpstore,fpstore_ux,fpstore_u") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_lsu") + +;; FP. +(define_insn_reservation "e5500_float" 7 + (and (eq_attr "type" "fpsimple,fp,fpcompare,dmul") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_fpu") + +(define_insn_reservation "e5500_sdiv" 20 + (and (eq_attr "type" "sdiv") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_fpu*20") + +(define_insn_reservation "e5500_ddiv" 35 + (and (eq_attr "type" "ddiv") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_fpu*35") + +;; BU. +(define_insn_reservation "e5500_branch" 1 + (and (eq_attr "type" "jmpreg,branch,isync") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_bu") + +;; BU - CR logical. +(define_insn_reservation "e5500_cr_logical" 1 + (and (eq_attr "type" "cr_logical,delayed_cr") + (eq_attr "cpu" "ppce5500")) + "e5500_decode,e5500_bu") Index: gcc-4_7-branch/gcc/config/rs6000/rs6000-opts.h =================================================================== --- gcc-4_7-branch.orig/gcc/config/rs6000/rs6000-opts.h 2012-04-10 10:17:24.283330056 -0700 +++ gcc-4_7-branch/gcc/config/rs6000/rs6000-opts.h 2012-04-10 10:39:32.543394369 -0700 @@ -53,6 +53,7 @@ PROCESSOR_PPCE300C3, PROCESSOR_PPCE500MC, PROCESSOR_PPCE500MC64, + PROCESSOR_PPCE5500, PROCESSOR_POWER4, PROCESSOR_POWER5, PROCESSOR_POWER6, Index: gcc-4_7-branch/gcc/config/rs6000/rs6000.c =================================================================== --- gcc-4_7-branch.orig/gcc/config/rs6000/rs6000.c 2012-04-10 10:17:24.263330055 -0700 +++ gcc-4_7-branch/gcc/config/rs6000/rs6000.c 2012-04-10 10:39:32.547394389 -0700 @@ -755,6 +755,25 @@ 1, /* prefetch streams /*/ }; +/* Instruction costs on PPCE5500 processors. */ +static const +struct processor_costs ppce5500_cost = { + COSTS_N_INSNS (5), /* mulsi */ + COSTS_N_INSNS (5), /* mulsi_const */ + COSTS_N_INSNS (5), /* mulsi_const9 */ + COSTS_N_INSNS (5), /* muldi */ + COSTS_N_INSNS (14), /* divsi */ + COSTS_N_INSNS (14), /* divdi */ + COSTS_N_INSNS (7), /* fp */ + COSTS_N_INSNS (10), /* dmul */ + COSTS_N_INSNS (36), /* sdiv */ + COSTS_N_INSNS (66), /* ddiv */ + 64, /* cache line size */ + 32, /* l1 cache */ + 128, /* l2 cache */ + 1, /* prefetch streams /*/ +}; + /* Instruction costs on AppliedMicro Titan processors. */ static const struct processor_costs titan_cost = { @@ -2704,7 +2723,8 @@ rs6000_cpu = processor_target_table[tune_index].processor; if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3 - || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64) + || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64 + || rs6000_cpu == PROCESSOR_PPCE5500) { if (TARGET_ALTIVEC) error ("AltiVec not supported in this target"); @@ -2805,9 +2825,14 @@ user's opinion, though. */ if (rs6000_block_move_inline_limit == 0 && (rs6000_cpu == PROCESSOR_PPCE500MC - || rs6000_cpu == PROCESSOR_PPCE500MC64)) + || rs6000_cpu == PROCESSOR_PPCE500MC64 + || rs6000_cpu == PROCESSOR_PPCE5500)) rs6000_block_move_inline_limit = 128; + /* Those machines does not have fsqrt instruction */ + if (rs6000_cpu == PROCESSOR_PPCE5500) + target_flags &= ~MASK_PPC_GPOPT; + /* store_one_arg depends on expand_block_move to handle at least the size of reg_parm_stack_space. */ if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32)) @@ -2939,7 +2964,8 @@ #endif if (TARGET_E500 || rs6000_cpu == PROCESSOR_PPCE500MC - || rs6000_cpu == PROCESSOR_PPCE500MC64) + || rs6000_cpu == PROCESSOR_PPCE500MC64 + || rs6000_cpu == PROCESSOR_PPCE5500) { /* The e500 and e500mc do not have string instructions, and we set MASK_STRING above when optimizing for size. */ @@ -2987,7 +3013,8 @@ || rs6000_cpu == PROCESSOR_POWER6 || rs6000_cpu == PROCESSOR_POWER7 || rs6000_cpu == PROCESSOR_PPCE500MC - || rs6000_cpu == PROCESSOR_PPCE500MC64); + || rs6000_cpu == PROCESSOR_PPCE500MC64 + || rs6000_cpu == PROCESSOR_PPCE5500); /* Allow debug switches to override the above settings. These are set to -1 in rs6000.opt to indicate the user hasn't directly set the switch. */ @@ -3209,6 +3236,10 @@ rs6000_cost = &ppce500mc64_cost; break; + case PROCESSOR_PPCE5500: + rs6000_cost = &ppce5500_cost; + break; + case PROCESSOR_TITAN: rs6000_cost = &titan_cost; break; @@ -22295,6 +22326,7 @@ || rs6000_cpu_attr == CPU_PPC750 || rs6000_cpu_attr == CPU_PPC7400 || rs6000_cpu_attr == CPU_PPC7450 + || rs6000_cpu_attr == CPU_PPCE5500 || rs6000_cpu_attr == CPU_POWER4 || rs6000_cpu_attr == CPU_POWER5 || rs6000_cpu_attr == CPU_POWER7 @@ -22839,6 +22871,7 @@ case CPU_PPCE300C3: case CPU_PPCE500MC: case CPU_PPCE500MC64: + case CPU_PPCE5500: case CPU_TITAN: return 2; case CPU_RIOS2: Index: gcc-4_7-branch/gcc/config/rs6000/rs6000.h =================================================================== --- gcc-4_7-branch.orig/gcc/config/rs6000/rs6000.h 2012-04-10 10:17:24.251330055 -0700 +++ gcc-4_7-branch/gcc/config/rs6000/rs6000.h 2012-04-10 10:39:32.551394375 -0700 @@ -168,6 +168,7 @@ %{mcpu=e300c3: -me300} \ %{mcpu=e500mc: -me500mc} \ %{mcpu=e500mc64: -me500mc64} \ +%{mcpu=e5500: -me5500} \ %{maltivec: -maltivec} \ %{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \ -many" @@ -475,13 +476,13 @@ #define TARGET_FCTIDZ TARGET_FCFID #define TARGET_STFIWX TARGET_PPC_GFXOPT -#define TARGET_LFIWAX TARGET_CMPB -#define TARGET_LFIWZX TARGET_POPCNTD -#define TARGET_FCFIDS TARGET_POPCNTD -#define TARGET_FCFIDU TARGET_POPCNTD -#define TARGET_FCFIDUS TARGET_POPCNTD -#define TARGET_FCTIDUZ TARGET_POPCNTD -#define TARGET_FCTIWUZ TARGET_POPCNTD +#define TARGET_LFIWAX (TARGET_CMPB && rs6000_cpu != PROCESSOR_PPCE5500) +#define TARGET_LFIWZX (TARGET_POPCNTD && rs6000_cpu != PROCESSOR_PPCE5500) +#define TARGET_FCFIDS TARGET_LFIWZX +#define TARGET_FCFIDU TARGET_LFIWZX +#define TARGET_FCFIDUS TARGET_LFIWZX +#define TARGET_FCTIDUZ TARGET_LFIWZX +#define TARGET_FCTIWUZ TARGET_LFIWZX /* For power systems, we want to enable Altivec and VSX builtins even if the user did not use -maltivec or -mvsx to allow the builtins to be used inside @@ -510,10 +511,12 @@ #define TARGET_FRE (TARGET_HARD_FLOAT && TARGET_FPRS \ && TARGET_DOUBLE_FLOAT \ - && (TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode))) + && (TARGET_POPCNTB || VECTOR_UNIT_VSX_P (DFmode)) \ + && rs6000_cpu != PROCESSOR_PPCE5500) #define TARGET_FRSQRTES (TARGET_HARD_FLOAT && TARGET_POPCNTB \ - && TARGET_FPRS && TARGET_SINGLE_FLOAT) + && TARGET_FPRS && TARGET_SINGLE_FLOAT \ + && rs6000_cpu != PROCESSOR_PPCE5500) #define TARGET_FRSQRTE (TARGET_HARD_FLOAT && TARGET_FPRS \ && TARGET_DOUBLE_FLOAT \ Index: gcc-4_7-branch/gcc/config/rs6000/rs6000.md =================================================================== --- gcc-4_7-branch.orig/gcc/config/rs6000/rs6000.md 2012-04-10 10:17:24.275330056 -0700 +++ gcc-4_7-branch/gcc/config/rs6000/rs6000.md 2012-04-10 10:39:32.555394355 -0700 @@ -144,7 +144,7 @@ ;; Define an insn type attribute. This is used in function unit delay ;; computations. -(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel" +(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt" (const_string "integer")) ;; Define floating point instruction sub-types for use with Xfpu.md @@ -166,7 +166,7 @@ ;; Processor type -- this attribute must exactly match the processor_type ;; enumeration in rs6000.h. -(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,power4,power5,power6,power7,cell,ppca2,titan" +(define_attr "cpu" "rios1,rios2,rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,power4,power5,power6,power7,cell,ppca2,titan" (const (symbol_ref "rs6000_cpu_attr"))) @@ -194,6 +194,7 @@ (include "e300c2c3.md") (include "e500mc.md") (include "e500mc64.md") +(include "e5500.md") (include "power4.md") (include "power5.md") (include "power6.md") @@ -2329,13 +2330,17 @@ (unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_POPCNTB))] "TARGET_POPCNTB" - "popcntb %0,%1") + "popcntb %0,%1" + [(set_attr "length" "4") + (set_attr "type" "popcnt")]) (define_insn "popcntd2" [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") (popcount:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")))] "TARGET_POPCNTD" - "popcnt %0,%1") + "popcnt %0,%1" + [(set_attr "length" "4") + (set_attr "type" "popcnt")]) (define_expand "popcount2" [(set (match_operand:GPR 0 "gpc_reg_operand" "") @@ -5984,10 +5989,10 @@ && ((TARGET_PPC_GFXOPT && !HONOR_NANS (mode) && !HONOR_SIGNED_ZEROS (mode)) - || TARGET_CMPB + || TARGET_LFIWAX || VECTOR_UNIT_VSX_P (mode))" { - if (TARGET_CMPB || VECTOR_UNIT_VSX_P (mode)) + if (TARGET_LFIWAX || VECTOR_UNIT_VSX_P (mode)) { emit_insn (gen_copysign3_fcpsgn (operands[0], operands[1], operands[2])); @@ -6006,7 +6011,7 @@ (unspec:SFDF [(match_operand:SFDF 1 "gpc_reg_operand" "") (match_operand:SFDF 2 "gpc_reg_operand" "")] UNSPEC_COPYSIGN))] - "TARGET_CMPB && !VECTOR_UNIT_VSX_P (mode)" + "TARGET_LFIWAX && !VECTOR_UNIT_VSX_P (mode)" "fcpsgn %0,%2,%1" [(set_attr "type" "fp")]) Index: gcc-4_7-branch/gcc/config/rs6000/rs6000-cpus.def =================================================================== --- gcc-4_7-branch.orig/gcc/config/rs6000/rs6000-cpus.def 2012-04-10 10:17:24.295330058 -0700 +++ gcc-4_7-branch/gcc/config/rs6000/rs6000-cpus.def 2012-04-10 10:39:32.555394355 -0700 @@ -88,6 +88,9 @@ | MASK_ISEL) RS6000_CPU ("e500mc64", PROCESSOR_PPCE500MC64, POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GFXOPT | MASK_ISEL) +RS6000_CPU ("e5500", PROCESSOR_PPCE5500, + POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GFXOPT | MASK_ISEL + | MASK_CMPB | MASK_POPCNTB | MASK_POPCNTD) RS6000_CPU ("860", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT) RS6000_CPU ("970", PROCESSOR_POWER4, POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64)