diff options
Diffstat (limited to 'gcc/config')
42 files changed, 915 insertions, 377 deletions
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c index 00a69c1a08d..15011aabcc7 100644 --- a/gcc/config/alpha/alpha.c +++ b/gcc/config/alpha/alpha.c @@ -1430,8 +1430,8 @@ alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total, case MINUS: if (float_mode_p) *total = cost_data->fp_add; - else if (GET_CODE (XEXP (x, 0)) == MULT - && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) + else if (GET_CODE (XEXP (x, 0)) == ASHIFT + && const23_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) { *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode, (enum rtx_code) outer_code, opno, speed) @@ -2929,8 +2929,8 @@ alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond, add_op = GEN_INT (f); if (sext_add_operand (add_op, mode)) { - tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget), - GEN_INT (diff)); + tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget), + GEN_INT (exact_log2 (diff))); tmp = gen_rtx_PLUS (DImode, tmp, add_op); emit_insn (gen_rtx_SET (target, tmp)); } diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index 4e213f812e9..97838a5083a 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -337,8 +337,8 @@ "! sext_add_operand (operands[2], SImode) && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) % 4 == 0" [(set (match_dup 3) (match_dup 4)) - (set (match_dup 0) (sign_extend:DI (plus:SI (mult:SI (match_dup 3) - (match_dup 5)) + (set (match_dup 0) (sign_extend:DI (plus:SI (ashift:SI (match_dup 3) + (match_dup 5)) (match_dup 1))))] { HOST_WIDE_INT val = INTVAL (operands[2]) / 4; @@ -348,7 +348,7 @@ val /= 2, mult = 8; operands[4] = GEN_INT (val); - operands[5] = GEN_INT (mult); + operands[5] = GEN_INT (exact_log2 (mult)); }) (define_split @@ -519,38 +519,71 @@ (define_insn "*sadd<modesuffix>" [(set (match_operand:I48MODE 0 "register_operand" "=r,r") (plus:I48MODE - (mult:I48MODE (match_operand:I48MODE 1 "reg_not_elim_operand" "r,r") - (match_operand:I48MODE 2 "const48_operand" "I,I")) + (ashift:I48MODE (match_operand:I48MODE 1 "reg_not_elim_operand" "r,r") + (match_operand:I48MODE 2 "const23_operand" "I,I")) (match_operand:I48MODE 3 "sext_add_operand" "rI,O")))] "" "@ - s%2add<modesuffix> %1,%3,%0 - s%2sub<modesuffix> %1,%n3,%0") + s%P2add<modesuffix> %1,%3,%0 + s%P2sub<modesuffix> %1,%n3,%0") + +(define_insn_and_split "*saddsi_1" + [(set (match_operand:SI 0 "register_operand" "=r,r") + (plus:SI + (subreg:SI + (ashift:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r") + (match_operand:DI 2 "const23_operand" "I,I")) 0) + (match_operand:SI 3 "sext_add_operand" "rI,O")))] + "" + "#" + "" + [(set (match_dup 0) + (plus:SI (ashift:SI (match_dup 1) (match_dup 2)) + (match_dup 3)))] + "operands[1] = gen_lowpart (SImode, operands[1]);") (define_insn "*saddl_se" [(set (match_operand:DI 0 "register_operand" "=r,r") (sign_extend:DI - (plus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r") - (match_operand:SI 2 "const48_operand" "I,I")) - (match_operand:SI 3 "sext_add_operand" "rI,O"))))] + (plus:SI + (ashift:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r") + (match_operand:SI 2 "const23_operand" "I,I")) + (match_operand:SI 3 "sext_add_operand" "rI,O"))))] "" "@ - s%2addl %1,%3,%0 - s%2subl %1,%n3,%0") + s%P2addl %1,%3,%0 + s%P2subl %1,%n3,%0") + +(define_insn_and_split "*saddl_se_1" + [(set (match_operand:DI 0 "register_operand" "=r,r") + (sign_extend:DI + (plus:SI + (subreg:SI + (ashift:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r") + (match_operand:DI 2 "const23_operand" "I,I")) 0) + (match_operand:SI 3 "sext_add_operand" "rI,O"))))] + "" + "#" + "" + [(set (match_dup 0) + (sign_extend:DI + (plus:SI (ashift:SI (match_dup 1) (match_dup 2)) + (match_dup 3))))] + "operands[1] = gen_lowpart (SImode, operands[1]);") (define_split [(set (match_operand:DI 0 "register_operand") (sign_extend:DI - (plus:SI (mult:SI (match_operator:SI 1 "comparison_operator" + (plus:SI (ashift:SI (match_operator:SI 1 "comparison_operator" [(match_operand 2) (match_operand 3)]) - (match_operand:SI 4 "const48_operand")) + (match_operand:SI 4 "const23_operand")) (match_operand:SI 5 "sext_add_operand")))) (clobber (match_operand:DI 6 "reg_not_elim_operand"))] "" [(set (match_dup 6) (match_dup 7)) (set (match_dup 0) - (sign_extend:DI (plus:SI (mult:SI (match_dup 8) (match_dup 4)) + (sign_extend:DI (plus:SI (ashift:SI (match_dup 8) (match_dup 4)) (match_dup 5))))] { operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode, @@ -621,20 +654,53 @@ (define_insn "*ssub<modesuffix>" [(set (match_operand:I48MODE 0 "register_operand" "=r") (minus:I48MODE - (mult:I48MODE (match_operand:I48MODE 1 "reg_not_elim_operand" "r") - (match_operand:I48MODE 2 "const48_operand" "I")) + (ashift:I48MODE (match_operand:I48MODE 1 "reg_not_elim_operand" "r") + (match_operand:I48MODE 2 "const23_operand" "I")) (match_operand:I48MODE 3 "reg_or_8bit_operand" "rI")))] "" - "s%2sub<modesuffix> %1,%3,%0") + "s%P2sub<modesuffix> %1,%3,%0") + +(define_insn_and_split "*ssubsi_1" + [(set (match_operand:SI 0 "register_operand" "=r") + (minus:SI + (subreg:SI + (ashift:DI (match_operand:DI 1 "reg_not_elim_operand" "r") + (match_operand:DI 2 "const23_operand" "I")) 0) + (match_operand:SI 3 "reg_or_8bit_operand" "rI")))] + "" + "#" + "" + [(set (match_dup 0) + (minus:SI (ashift:SI (match_dup 1) (match_dup 2)) + (match_dup 3)))] + "operands[1] = gen_lowpart (SImode, operands[1]);") (define_insn "*ssubl_se" [(set (match_operand:DI 0 "register_operand" "=r") (sign_extend:DI - (minus:SI (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r") - (match_operand:SI 2 "const48_operand" "I")) - (match_operand:SI 3 "reg_or_8bit_operand" "rI"))))] + (minus:SI + (ashift:SI (match_operand:SI 1 "reg_not_elim_operand" "r") + (match_operand:SI 2 "const23_operand" "I")) + (match_operand:SI 3 "reg_or_8bit_operand" "rI"))))] + "" + "s%P2subl %1,%3,%0") + +(define_insn_and_split "*ssubl_se_1" + [(set (match_operand:DI 0 "register_operand" "=r") + (sign_extend:DI + (minus:SI + (subreg:SI + (ashift:DI (match_operand:DI 1 "reg_not_elim_operand" "r") + (match_operand:DI 2 "const23_operand" "I")) 0) + (match_operand:SI 3 "reg_or_8bit_operand" "rI"))))] "" - "s%2subl %1,%3,%0") + "#" + "" + [(set (match_dup 0) + (sign_extend:DI + (minus:SI (ashift:SI (match_dup 1) (match_dup 2)) + (match_dup 3))))] + "operands[1] = gen_lowpart (SImode, operands[1]);") (define_insn "subv<mode>3" [(set (match_operand:I48MODE 0 "register_operand" "=r") @@ -1200,7 +1266,7 @@ (subreg:SI (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ") (match_operand:DI 2 "const_int_operand" "P")) 0)))] - "INTVAL (operands[2]) >= 1 && INTVAL (operands[2]) <= 3" + "IN_RANGE (INTVAL (operands[2]), 1, 3)" { if (operands[2] == const1_rtx) return "addl %r1,%r1,%0"; @@ -3139,9 +3205,10 @@ [(set (match_dup 5) (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) (set (match_dup 0) - (plus:DI (mult:DI (match_dup 5) (match_dup 3)) + (plus:DI (ashift:DI (match_dup 5) (match_dup 3)) (match_dup 4)))] { + operands[3] = GEN_INT (exact_log2 (INTVAL (operands [3]))); if (can_create_pseudo_p ()) operands[5] = gen_reg_rtx (DImode); else if (reg_overlap_mentioned_p (operands[5], operands[4])) @@ -3164,9 +3231,10 @@ [(set (match_dup 5) (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) (set (match_dup 0) - (plus:SI (mult:SI (match_dup 6) (match_dup 3)) + (plus:SI (ashift:SI (match_dup 6) (match_dup 3)) (match_dup 4)))] { + operands[3] = GEN_INT (exact_log2 (INTVAL (operands [3]))); if (can_create_pseudo_p ()) operands[5] = gen_reg_rtx (DImode); else if (reg_overlap_mentioned_p (operands[5], operands[4])) @@ -3192,9 +3260,10 @@ [(set (match_dup 5) (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) (set (match_dup 0) - (sign_extend:DI (plus:SI (mult:SI (match_dup 6) (match_dup 3)) + (sign_extend:DI (plus:SI (ashift:SI (match_dup 6) (match_dup 3)) (match_dup 4))))] { + operands[3] = GEN_INT (exact_log2 (INTVAL (operands [3]))); if (can_create_pseudo_p ()) operands[5] = gen_reg_rtx (DImode); else if (reg_overlap_mentioned_p (operands[5], operands[4])) @@ -3219,9 +3288,10 @@ [(set (match_dup 5) (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) (set (match_dup 0) - (minus:DI (mult:DI (match_dup 5) (match_dup 3)) + (minus:DI (ashift:DI (match_dup 5) (match_dup 3)) (match_dup 4)))] { + operands[3] = GEN_INT (exact_log2 (INTVAL (operands [3]))); if (can_create_pseudo_p ()) operands[5] = gen_reg_rtx (DImode); else if (reg_overlap_mentioned_p (operands[5], operands[4])) @@ -3244,9 +3314,10 @@ [(set (match_dup 5) (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) (set (match_dup 0) - (minus:SI (mult:SI (match_dup 6) (match_dup 3)) + (minus:SI (ashift:SI (match_dup 6) (match_dup 3)) (match_dup 4)))] { + operands[3] = GEN_INT (exact_log2 (INTVAL (operands [3]))); if (can_create_pseudo_p ()) operands[5] = gen_reg_rtx (DImode); else if (reg_overlap_mentioned_p (operands[5], operands[4])) @@ -3272,9 +3343,10 @@ [(set (match_dup 5) (match_op_dup:DI 1 [(match_dup 2) (const_int 0)])) (set (match_dup 0) - (sign_extend:DI (minus:SI (mult:SI (match_dup 6) (match_dup 3)) + (sign_extend:DI (minus:SI (ashift:SI (match_dup 6) (match_dup 3)) (match_dup 4))))] { + operands[3] = GEN_INT (exact_log2 (INTVAL (operands [3]))); if (can_create_pseudo_p ()) operands[5] = gen_reg_rtx (DImode); else if (reg_overlap_mentioned_p (operands[5], operands[4])) diff --git a/gcc/config/alpha/predicates.md b/gcc/config/alpha/predicates.md index 102451ee80c..6fc359ec053 100644 --- a/gcc/config/alpha/predicates.md +++ b/gcc/config/alpha/predicates.md @@ -74,6 +74,11 @@ (and (match_code "const_int,const_wide_int,const_double,const_vector") (not (match_test "op == CONST0_RTX (mode)")))) +;; Return 1 if OP is the constant 2 or 3. +(define_predicate "const23_operand" + (and (match_code "const_int") + (match_test "INTVAL (op) == 2 || INTVAL (op) == 3"))) + ;; Return 1 if OP is the constant 4 or 8. (define_predicate "const48_operand" (and (match_code "const_int") diff --git a/gcc/config/arc/linux.h b/gcc/config/arc/linux.h index 83e5a1d61f2..6127a026fb3 100644 --- a/gcc/config/arc/linux.h +++ b/gcc/config/arc/linux.h @@ -29,7 +29,7 @@ along with GCC; see the file COPYING3. If not see } \ while (0) -#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2" +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-arc.so.2" #define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0" /* Note that the default is to link against dynamic libraries, if they are diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 246c464be7b..2856b1afd5c 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -19331,7 +19331,12 @@ arm_get_vfp_saved_size (void) /* Generate a function exit sequence. If REALLY_RETURN is false, then do everything bar the final return instruction. If simple_return is true, - then do not output epilogue, because it has already been emitted in RTL. */ + then do not output epilogue, because it has already been emitted in RTL. + + Note: do not forget to update length attribute of corresponding insn pattern + when changing assembly output (eg. length attribute of + thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions + register clearing sequences). */ const char * output_return_instruction (rtx operand, bool really_return, bool reverse, bool simple_return) @@ -23829,7 +23834,12 @@ thumb_pop (FILE *f, unsigned long mask) /* Generate code to return from a thumb function. If 'reg_containing_return_addr' is -1, then the return address is - actually on the stack, at the stack pointer. */ + actually on the stack, at the stack pointer. + + Note: do not forget to update length attribute of corresponding insn pattern + when changing assembly output (eg. length attribute of epilogue_insns when + updating Armv8-M Baseline Security Extensions register clearing + sequences). */ static void thumb_exit (FILE *f, int reg_containing_return_addr) { diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index f9e4356756a..ac1a49f8705 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1662,12 +1662,10 @@ enum arm_auto_incmodes /* These assume that REGNO is a hard or pseudo reg number. They give nonzero only if REGNO is a hard reg of the suitable class - or a pseudo reg currently allocated to a suitable hard reg. - Since they use reg_renumber, they are safe only once reg_renumber - has been allocated, which happens in reginfo.c during register - allocation. */ + or a pseudo reg currently allocated to a suitable hard reg. */ #define TEST_REGNO(R, TEST, VALUE) \ - ((R TEST VALUE) || ((unsigned) reg_renumber[R] TEST VALUE)) + ((R TEST VALUE) \ + || (reg_renumber && ((unsigned) reg_renumber[R] TEST VALUE))) /* Don't allow the pc to be used. */ #define ARM_REGNO_OK_FOR_BASE_P(REGNO) \ diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 2e7580f220e..35f8e9bbf24 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -1132,7 +1132,7 @@ ; we adapt the length accordingly. (set (attr "length") (if_then_else (match_test "TARGET_HARD_FLOAT") - (const_int 12) + (const_int 34) (const_int 8))) ; We do not support predicate execution of returns from cmse_nonsecure_entry ; functions because we need to clear the APSR. Since predicable has to be diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index d8f77e2ffe4..9f06c3da952 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -456,7 +456,10 @@ gcc_unreachable (); } } - [(set_attr "predicable" "yes, yes, no, yes, no, no, no, no, no, no") + [(set_attr "conds" "*, *, unconditional, *, unconditional, unconditional,\ + unconditional, unconditional, unconditional,\ + unconditional") + (set_attr "predicable" "yes, yes, no, yes, no, no, no, no, no, no") (set_attr "predicable_short_it" "no, no, no, yes,\ no, no, no, no,\ no, no") diff --git a/gcc/config/epiphany/rtems.h b/gcc/config/epiphany/rtems.h new file mode 100644 index 00000000000..a9971f8c0b6 --- /dev/null +++ b/gcc/config/epiphany/rtems.h @@ -0,0 +1,28 @@ +/* Definitions for RTEMS based EPIPHANY systems. + Copyright (C) 2018 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#define HAS_INIT_SECTION + +#undef TARGET_OS_CPP_BUILTINS +#define TARGET_OS_CPP_BUILTINS() \ + do { \ + builtin_define ("__rtems__"); \ + builtin_define ("__USE_INIT_FINI__"); \ + builtin_assert ("system=rtems"); \ + } while (0) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index cd9153efe57..2af4e9a2859 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2444,53 +2444,6 @@ struct GTY(()) stack_local_entry { struct stack_local_entry *next; }; -/* Structure describing stack frame layout. - Stack grows downward: - - [arguments] - <- ARG_POINTER - saved pc - - saved static chain if ix86_static_chain_on_stack - - saved frame pointer if frame_pointer_needed - <- HARD_FRAME_POINTER - [saved regs] - <- regs_save_offset - [padding0] - - [saved SSE regs] - <- sse_regs_save_offset - [padding1] | - | <- FRAME_POINTER - [va_arg registers] | - | - [frame] | - | - [padding2] | = to_allocate - <- STACK_POINTER - */ -struct ix86_frame -{ - int nsseregs; - int nregs; - int va_arg_size; - int red_zone_size; - int outgoing_arguments_size; - - /* The offsets relative to ARG_POINTER. */ - HOST_WIDE_INT frame_pointer_offset; - HOST_WIDE_INT hard_frame_pointer_offset; - HOST_WIDE_INT stack_pointer_offset; - HOST_WIDE_INT hfp_save_offset; - HOST_WIDE_INT reg_save_offset; - HOST_WIDE_INT sse_reg_save_offset; - - /* When save_regs_using_mov is set, emit prologue using - move instead of push instructions. */ - bool save_regs_using_mov; -}; - /* Which cpu are we scheduling for. */ enum attr_cpu ix86_schedule; @@ -2582,7 +2535,7 @@ static unsigned int ix86_function_arg_boundary (machine_mode, const_tree); static rtx ix86_static_chain (const_tree, bool); static int ix86_function_regparm (const_tree, const_tree); -static void ix86_compute_frame_layout (struct ix86_frame *); +static void ix86_compute_frame_layout (void); static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode, rtx, rtx, int); static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT); @@ -2686,7 +2639,7 @@ rest_of_handle_insert_vzeroupper (void) int i; /* vzeroupper instructions are inserted immediately after reload to - account for possible spills from 256bit registers. The pass + account for possible spills from 256bit or 512bit registers. The pass reuses mode switching infrastructure by re-running mode insertion pass, so disable entities that have already been processed. */ for (i = 0; i < MAX_386_ENTITIES; i++) @@ -4185,7 +4138,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return TARGET_AVX && !TARGET_AVX512F + return TARGET_AVX && TARGET_VZEROUPPER && flag_expensive_optimizations && !optimize_size; } @@ -6203,7 +6156,8 @@ ix86_option_override_internal (bool main_args_p, #endif } - if (!(opts_set->x_target_flags & MASK_VZEROUPPER)) + if (!(opts_set->x_target_flags & MASK_VZEROUPPER) + && TARGET_EMIT_VZEROUPPER) opts->x_target_flags |= MASK_VZEROUPPER; if (!(opts_set->x_target_flags & MASK_STV)) opts->x_target_flags |= MASK_STV; @@ -11889,8 +11843,6 @@ symbolic_reference_mentioned_p (rtx op) bool ix86_can_use_return_insn_p (void) { - struct ix86_frame frame; - /* Don't use `ret' instruction in interrupt handler. */ if (! reload_completed || frame_pointer_needed @@ -11902,7 +11854,8 @@ ix86_can_use_return_insn_p (void) if (crtl->args.pops_args && crtl->args.size >= 32768) return 0; - ix86_compute_frame_layout (&frame); + ix86_compute_frame_layout (); + struct ix86_frame &frame = cfun->machine->frame; return (frame.stack_pointer_offset == UNITS_PER_WORD && (frame.nregs + frame.nsseregs) == 0); } @@ -12388,8 +12341,8 @@ ix86_can_eliminate (const int from, const int to) HOST_WIDE_INT ix86_initial_elimination_offset (int from, int to) { - struct ix86_frame frame; - ix86_compute_frame_layout (&frame); + ix86_compute_frame_layout (); + struct ix86_frame &frame = cfun->machine->frame; if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) return frame.hard_frame_pointer_offset; @@ -12428,8 +12381,9 @@ ix86_builtin_setjmp_frame_value (void) /* Fill structure ix86_frame about frame of currently computed function. */ static void -ix86_compute_frame_layout (struct ix86_frame *frame) +ix86_compute_frame_layout (void) { + struct ix86_frame *frame = &cfun->machine->frame; unsigned HOST_WIDE_INT stack_alignment_needed; HOST_WIDE_INT offset; unsigned HOST_WIDE_INT preferred_alignment; @@ -13713,7 +13667,6 @@ ix86_expand_prologue (void) { struct machine_function *m = cfun->machine; rtx insn, t; - struct ix86_frame frame; HOST_WIDE_INT allocate; bool int_registers_saved; bool sse_registers_saved; @@ -13736,7 +13689,8 @@ ix86_expand_prologue (void) m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; m->fs.sp_valid = true; - ix86_compute_frame_layout (&frame); + ix86_compute_frame_layout (); + struct ix86_frame &frame = cfun->machine->frame; if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl)) { @@ -14399,12 +14353,12 @@ ix86_expand_epilogue (int style) { struct machine_function *m = cfun->machine; struct machine_frame_state frame_state_save = m->fs; - struct ix86_frame frame; bool restore_regs_via_mov; bool using_drap; ix86_finalize_stack_realign_flags (); - ix86_compute_frame_layout (&frame); + ix86_compute_frame_layout (); + struct ix86_frame &frame = cfun->machine->frame; m->fs.sp_valid = (!frame_pointer_needed || (crtl->sp_is_unchanging @@ -14902,7 +14856,6 @@ static GTY(()) rtx split_stack_fn_large; void ix86_expand_split_stack_prologue (void) { - struct ix86_frame frame; HOST_WIDE_INT allocate; unsigned HOST_WIDE_INT args_size; rtx_code_label *label; @@ -14914,7 +14867,8 @@ ix86_expand_split_stack_prologue (void) gcc_assert (flag_split_stack && reload_completed); ix86_finalize_stack_realign_flags (); - ix86_compute_frame_layout (&frame); + ix86_compute_frame_layout (); + struct ix86_frame &frame = cfun->machine->frame; allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; /* This is the label we will branch to if we have enough stack @@ -19147,16 +19101,17 @@ ix86_dirflag_mode_needed (rtx_insn *insn) return X86_DIRFLAG_ANY; } -/* Check if a 256bit AVX register is referenced inside of EXP. */ +/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ static bool -ix86_check_avx256_register (const_rtx exp) +ix86_check_avx_upper_register (const_rtx exp) { if (SUBREG_P (exp)) exp = SUBREG_REG (exp); return (REG_P (exp) - && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))); + && (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)) + || VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp)))); } /* Return needed mode for entity in optimize_mode_switching pass. */ @@ -19169,7 +19124,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) rtx link; /* Needed mode is set to AVX_U128_CLEAN if there are - no 256bit modes used in function arguments. */ + no 256bit or 512bit modes used in function arguments. */ for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) @@ -19178,7 +19133,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) { rtx arg = XEXP (XEXP (link, 0), 0); - if (ix86_check_avx256_register (arg)) + if (ix86_check_avx_upper_register (arg)) return AVX_U128_DIRTY; } } @@ -19186,13 +19141,13 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) return AVX_U128_CLEAN; } - /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware - changes state only when a 256bit register is written to, but we need - to prevent the compiler from moving optimal insertion point above - eventual read from 256bit register. */ + /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. + Hardware changes state only when a 256bit register is written to, + but we need to prevent the compiler from moving optimal insertion + point above eventual read from 256bit or 512 bit register. */ subrtx_iterator::array_type array; FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) - if (ix86_check_avx256_register (*iter)) + if (ix86_check_avx_upper_register (*iter)) return AVX_U128_DIRTY; return AVX_U128_ANY; @@ -19274,12 +19229,12 @@ ix86_mode_needed (int entity, rtx_insn *insn) return 0; } -/* Check if a 256bit AVX register is referenced in stores. */ +/* Check if a 256bit or 512bit AVX register is referenced in stores. */ static void -ix86_check_avx256_stores (rtx dest, const_rtx, void *data) +ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) { - if (ix86_check_avx256_register (dest)) + if (ix86_check_avx_upper_register (dest)) { bool *used = (bool *) data; *used = true; @@ -19298,18 +19253,18 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) return AVX_U128_CLEAN; /* We know that state is clean after CALL insn if there are no - 256bit registers used in the function return register. */ + 256bit or 512bit registers used in the function return register. */ if (CALL_P (insn)) { - bool avx_reg256_found = false; - note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found); + bool avx_upper_reg_found = false; + note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found); - return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; + return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; } /* Otherwise, return current mode. Remember that if insn - references AVX 256bit registers, the mode was already changed - to DIRTY from MODE_NEEDED. */ + references AVX 256bit or 512bit registers, the mode was already + changed to DIRTY from MODE_NEEDED. */ return mode; } @@ -19352,13 +19307,13 @@ ix86_avx_u128_mode_entry (void) tree arg; /* Entry mode is set to AVX_U128_DIRTY if there are - 256bit modes used in function arguments. */ + 256bit or 512bit modes used in function arguments. */ for (arg = DECL_ARGUMENTS (current_function_decl); arg; arg = TREE_CHAIN (arg)) { rtx incoming = DECL_INCOMING_RTL (arg); - if (incoming && ix86_check_avx256_register (incoming)) + if (incoming && ix86_check_avx_upper_register (incoming)) return AVX_U128_DIRTY; } @@ -19392,9 +19347,9 @@ ix86_avx_u128_mode_exit (void) { rtx reg = crtl->return_rtx; - /* Exit mode is set to AVX_U128_DIRTY if there are - 256bit modes used in the function return register. */ - if (reg && ix86_check_avx256_register (reg)) + /* Exit mode is set to AVX_U128_DIRTY if there are 256bit + or 512 bit modes used in the function return register. */ + if (reg && ix86_check_avx_upper_register (reg)) return AVX_U128_DIRTY; return AVX_U128_CLEAN; @@ -41878,7 +41833,8 @@ static void x86_print_call_or_nop (FILE *file, const char *target) { if (flag_nop_mcount) - fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */ + /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ + fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); else fprintf (file, "1:\tcall\t%s\n", target); } @@ -45305,8 +45261,7 @@ ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) emit_move_insn (op0, ireg); } -/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the - result in OPERAND0. */ +/* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */ void ix86_expand_rint (rtx operand0, rtx operand1) { @@ -45314,11 +45269,17 @@ ix86_expand_rint (rtx operand0, rtx operand1) xa = fabs (operand1); if (!isless (xa, 2**52)) return operand1; - xa = xa + 2**52 - 2**52; + two52 = 2**52; + if (flag_rounding_math) + { + two52 = copysign (two52, operand1); + xa = operand1; + } + xa = xa + two52 - two52; return copysign (xa, operand1); */ machine_mode mode = GET_MODE (operand0); - rtx res, xa, TWO52, mask; + rtx res, xa, TWO52, two52, mask; rtx_code_label *label; res = gen_reg_rtx (mode); @@ -45331,8 +45292,16 @@ ix86_expand_rint (rtx operand0, rtx operand1) TWO52 = ix86_gen_TWO52 (mode); label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); - xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); + two52 = TWO52; + if (flag_rounding_math) + { + two52 = gen_reg_rtx (mode); + ix86_sse_copysign_to_positive (two52, TWO52, res, mask); + xa = res; + } + + xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT); + xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT); ix86_sse_copysign_to_positive (res, xa, res, mask); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 9e5f4d857d9..f9b91286a01 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -508,6 +508,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI] #define TARGET_ONE_IF_CONV_INSN \ ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN] +#define TARGET_EMIT_VZEROUPPER \ + ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { @@ -1121,6 +1123,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \ || (MODE) == V4TImode) +#define VALID_AVX512F_REG_OR_XI_MODE(MODE) \ + (VALID_AVX512F_REG_MODE (MODE) || (MODE) == XImode) + #define VALID_AVX512VL_128_REG_MODE(MODE) \ ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \ || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \ @@ -2446,9 +2451,56 @@ enum avx_u128_state #define FASTCALL_PREFIX '@' +#ifndef USED_FOR_TARGET +/* Structure describing stack frame layout. + Stack grows downward: + + [arguments] + <- ARG_POINTER + saved pc + + saved static chain if ix86_static_chain_on_stack + + saved frame pointer if frame_pointer_needed + <- HARD_FRAME_POINTER + [saved regs] + <- regs_save_offset + [padding0] + + [saved SSE regs] + <- sse_regs_save_offset + [padding1] | + | <- FRAME_POINTER + [va_arg registers] | + | + [frame] | + | + [padding2] | = to_allocate + <- STACK_POINTER + */ +struct GTY(()) ix86_frame +{ + int nsseregs; + int nregs; + int va_arg_size; + int red_zone_size; + int outgoing_arguments_size; + + /* The offsets relative to ARG_POINTER. */ + HOST_WIDE_INT frame_pointer_offset; + HOST_WIDE_INT hard_frame_pointer_offset; + HOST_WIDE_INT stack_pointer_offset; + HOST_WIDE_INT hfp_save_offset; + HOST_WIDE_INT reg_save_offset; + HOST_WIDE_INT sse_reg_save_offset; + + /* When save_regs_using_mov is set, emit prologue using + move instead of push instructions. */ + bool save_regs_using_mov; +}; + /* Machine specific frame tracking during prologue/epilogue generation. */ -#ifndef USED_FOR_TARGET struct GTY(()) machine_frame_state { /* This pair tracks the currently active CFA as reg+offset. When reg @@ -2507,6 +2559,9 @@ struct GTY(()) machine_function { int varargs_fpr_size; int optimize_mode_switching[MAX_386_ENTITIES]; + /* Cached initial frame layout for the current function. */ + struct ix86_frame frame; + /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed for. */ int use_fast_prologue_epilogue_nregs; @@ -2589,6 +2644,7 @@ struct GTY(()) machine_function { #define ix86_current_function_calls_tls_descriptor \ (ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG)) #define ix86_static_chain_on_stack (cfun->machine->static_chain_on_stack) +#define ix86_red_zone_size (cfun->machine->frame.red_zone_size) /* Control behavior of x86_file_start. */ #define X86_FILE_START_VERSION_DIRECTIVE false diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0281bb5f06c..dbe88f40c8f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -9641,7 +9641,7 @@ (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand") + (match_operand:SI 2 "register_operand" "c,r") (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands) @@ -9655,7 +9655,8 @@ (ashift:SWI48 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "operands[2] = gen_lowpart (QImode, operands[2]);") + "operands[2] = gen_lowpart (QImode, operands[2]);" + [(set_attr "isa" "*,bmi2")]) (define_insn "*bmi2_ashl<mode>3_1" [(set (match_operand:SWI48 0 "register_operand" "=r") @@ -10141,7 +10142,7 @@ (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand") + (match_operand:SI 2 "register_operand" "c,r") (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) @@ -10155,7 +10156,8 @@ (any_shiftrt:SWI48 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "operands[2] = gen_lowpart (QImode, operands[2]);") + "operands[2] = gen_lowpart (QImode, operands[2]);" + [(set_attr "isa" "*,bmi2")]) (define_insn_and_split "*<shift_insn><mode>3_doubleword" [(set (match_operand:DWI 0 "register_operand" "=&r") @@ -10600,7 +10602,7 @@ (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand") + (match_operand:SI 2 "register_operand" "c") (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a95f529d6a9..80cda39bacf 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -13852,7 +13852,7 @@ (match_operand:DI 1 "nonimmediate_operand" " 0, 0,x ,Yv,r ,vm,?!*Yn,0,Yv,0,0,v") (match_operand:DI 2 "vector_move_operand" - "*rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))] + " rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))] "TARGET_SSE" "@ pinsrq\t{$1, %2, %0|%0, %2, 1} diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index c642f452e00..3b2701bb5f2 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -551,3 +551,7 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4) if-converted sequence to one. */ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC) + +/* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion + before a transfer of control flow out of the function. */ +DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL) diff --git a/gcc/config/i386/xm-mingw32.h b/gcc/config/i386/xm-mingw32.h index 6eb3a626f0c..d4d63f58322 100644 --- a/gcc/config/i386/xm-mingw32.h +++ b/gcc/config/i386/xm-mingw32.h @@ -37,8 +37,3 @@ along with GCC; see the file COPYING3. If not see "long long" values. Instead, we use "I64". */ #define HOST_LONG_LONG_FORMAT "I64" #endif - -/* this is to prevent gcc-heap.c from assuming sizeof(long) == sizeof(intptr_t) */ -#ifdef __x86_64__ -# define HOST_BITS_PER_PTR 64 -#endif diff --git a/gcc/config/mips/frame-header-opt.c b/gcc/config/mips/frame-header-opt.c index 7e42dca3832..029f1759f06 100644 --- a/gcc/config/mips/frame-header-opt.c +++ b/gcc/config/mips/frame-header-opt.c @@ -98,8 +98,7 @@ void mips_register_frame_header_opt (void) { opt_pass *p = make_pass_ipa_frame_header_opt (g); - static struct register_pass_info f = - {p, "comdats", 1, PASS_POS_INSERT_AFTER }; + struct register_pass_info f = { p, "comdats", 1, PASS_POS_INSERT_AFTER }; register_pass (&f); } diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c index 3e65ba1db59..27dcdeab3d2 100644 --- a/gcc/config/pa/pa.c +++ b/gcc/config/pa/pa.c @@ -10511,9 +10511,16 @@ pa_legitimate_address_p (machine_mode mode, rtx x, bool strict) if (!TARGET_DISABLE_INDEXING && GET_CODE (index) == MULT - && MODE_OK_FOR_SCALED_INDEXING_P (mode) + /* Only accept base operands with the REG_POINTER flag prior to + reload on targets with non-equivalent space registers. */ + && (TARGET_NO_SPACE_REGS + || (base == XEXP (x, 1) + && (reload_completed + || (reload_in_progress && HARD_REGISTER_P (base)) + || REG_POINTER (base)))) && REG_P (XEXP (index, 0)) && GET_MODE (XEXP (index, 0)) == Pmode + && MODE_OK_FOR_SCALED_INDEXING_P (mode) && (strict ? STRICT_REG_OK_FOR_INDEX_P (XEXP (index, 0)) : REG_OK_FOR_INDEX_P (XEXP (index, 0))) && GET_CODE (XEXP (index, 1)) == CONST_INT diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h index ecf424d1a2b..4b2f7b6e1fd 100644 --- a/gcc/config/riscv/linux.h +++ b/gcc/config/riscv/linux.h @@ -24,6 +24,17 @@ along with GCC; see the file COPYING3. If not see #define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-riscv" XLEN_SPEC "-" ABI_SPEC ".so.1" +#define MUSL_ABI_SUFFIX \ + "%{mabi=ilp32:-sf}" \ + "%{mabi=ilp32f:-sp}" \ + "%{mabi=ilp32d:}" \ + "%{mabi=lp64:-sf}" \ + "%{mabi=lp64f:-sp}" \ + "%{mabi=lp64d:}" \ + +#undef MUSL_DYNAMIC_LINKER +#define MUSL_DYNAMIC_LINKER "/lib/ld-musl-riscv" XLEN_SPEC MUSL_ABI_SUFFIX ".so.1" + /* Because RISC-V only has word-sized atomics, it requries libatomic where others do not. So link libatomic by default, as needed. */ #undef LIB_SPEC @@ -34,6 +45,8 @@ along with GCC; see the file COPYING3. If not see #define LIB_SPEC GNU_USER_TARGET_LIB_SPEC " -latomic " #endif +#define ICACHE_FLUSH_FUNC "__riscv_flush_icache" + #define LINK_SPEC "\ -melf" XLEN_SPEC "lriscv \ %{shared} \ diff --git a/gcc/config/riscv/pic.md b/gcc/config/riscv/pic.md index 6a29ead32d3..03b8f9bc669 100644 --- a/gcc/config/riscv/pic.md +++ b/gcc/config/riscv/pic.md @@ -22,13 +22,20 @@ ;; Simplify PIC loads to static variables. ;; These should go away once we figure out how to emit auipc discretely. -(define_insn "*local_pic_load<mode>" +(define_insn "*local_pic_load_s<mode>" [(set (match_operand:ANYI 0 "register_operand" "=r") - (mem:ANYI (match_operand 1 "absolute_symbolic_operand" "")))] + (sign_extend:ANYI (mem:ANYI (match_operand 1 "absolute_symbolic_operand" ""))))] "USE_LOAD_ADDRESS_MACRO (operands[1])" "<load>\t%0,%1" [(set (attr "length") (const_int 8))]) +(define_insn "*local_pic_load_u<mode>" + [(set (match_operand:ZERO_EXTEND_LOAD 0 "register_operand" "=r") + (zero_extend:ZERO_EXTEND_LOAD (mem:ZERO_EXTEND_LOAD (match_operand 1 "absolute_symbolic_operand" ""))))] + "USE_LOAD_ADDRESS_MACRO (operands[1])" + "<load>u\t%0,%1" + [(set (attr "length") (const_int 8))]) + (define_insn "*local_pic_load<mode>" [(set (match_operand:ANYF 0 "register_operand" "=f") (mem:ANYF (match_operand 1 "absolute_symbolic_operand" ""))) diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c index d5928c334de..5f53819eb36 100644 --- a/gcc/config/riscv/riscv.c +++ b/gcc/config/riscv/riscv.c @@ -1,5 +1,5 @@ /* Subroutines used for code generation for RISC-V. - Copyright (C) 2011-2017 Free Software Foundation, Inc. + Copyright (C) 2011-2018 Free Software Foundation, Inc. Contributed by Andrew Waterman (andrew@sifive.com). Based on MIPS target for GNU compiler. @@ -177,9 +177,6 @@ struct GTY(()) machine_function { This area is allocated by the callee at the very top of the frame. */ int varargs_size; - /* Memoized return value of leaf_function_p. <0 if false, >0 if true. */ - int is_leaf; - /* The current frame information, calculated by riscv_compute_frame_info. */ struct riscv_frame_info frame; }; @@ -255,6 +252,7 @@ struct riscv_tune_info unsigned short issue_rate; unsigned short branch_cost; unsigned short memory_cost; + bool slow_unaligned_access; }; /* Information about one CPU we know about. */ @@ -268,6 +266,9 @@ struct riscv_cpu_info { /* Global variables for machine-dependent things. */ +/* Whether unaligned accesses execute very slowly. */ +bool riscv_slow_unaligned_access; + /* Which tuning parameters to use. */ static const struct riscv_tune_info *tune_info; @@ -301,7 +302,8 @@ static const struct riscv_tune_info rocket_tune_info = { {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */ 1, /* issue_rate */ 3, /* branch_cost */ - 5 /* memory_cost */ + 5, /* memory_cost */ + true, /* slow_unaligned_access */ }; /* Costs to use when optimizing for size. */ @@ -313,12 +315,14 @@ static const struct riscv_tune_info optimize_size_tune_info = { {COSTS_N_INSNS (1), COSTS_N_INSNS (1)}, /* int_div */ 1, /* issue_rate */ 1, /* branch_cost */ - 2 /* memory_cost */ + 2, /* memory_cost */ + false, /* slow_unaligned_access */ }; /* A table describing all the processors GCC knows about. */ static const struct riscv_cpu_info riscv_cpu_info_table[] = { { "rocket", &rocket_tune_info }, + { "size", &optimize_size_tune_info }, }; /* Return the riscv_cpu_info entry for the given name string. */ @@ -726,7 +730,8 @@ riscv_valid_lo_sum_p (enum riscv_symbol_type sym_type, enum machine_mode mode) /* We may need to split multiword moves, so make sure that each word can be accessed without inducing a carry. */ if (GET_MODE_SIZE (mode) > UNITS_PER_WORD - && GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode)) + && (!TARGET_STRICT_ALIGN + || GET_MODE_BITSIZE (mode) > GET_MODE_ALIGNMENT (mode))) return false; return true; @@ -1377,6 +1382,22 @@ riscv_legitimize_move (enum machine_mode mode, rtx dest, rtx src) return true; } + /* RISC-V GCC may generate non-legitimate address due to we provide some + pattern for optimize access PIC local symbol and it's make GCC generate + unrecognizable instruction during optmizing. */ + + if (MEM_P (dest) && !riscv_legitimate_address_p (mode, XEXP (dest, 0), + reload_completed)) + { + XEXP (dest, 0) = riscv_force_address (XEXP (dest, 0), mode); + } + + if (MEM_P (src) && !riscv_legitimate_address_p (mode, XEXP (src, 0), + reload_completed)) + { + XEXP (src, 0) = riscv_force_address (XEXP (src, 0), mode); + } + return false; } @@ -3773,6 +3794,16 @@ riscv_option_override (void) RISCV_TUNE_STRING_DEFAULT); tune_info = optimize_size ? &optimize_size_tune_info : cpu->tune_info; + /* Use -mtune's setting for slow_unaligned_access, even when optimizing + for size. For architectures that trap and emulate unaligned accesses, + the performance cost is too great, even for -Os. Similarly, if + -m[no-]strict-align is left unspecified, heed -mtune's advice. */ + riscv_slow_unaligned_access = (cpu->tune_info->slow_unaligned_access + || TARGET_STRICT_ALIGN); + if ((target_flags_explicit & MASK_STRICT_ALIGN) == 0 + && cpu->tune_info->slow_unaligned_access) + target_flags |= MASK_STRICT_ALIGN; + /* If the user hasn't specified a branch cost, use the processor's default. */ if (riscv_branch_cost == 0) @@ -3960,26 +3991,15 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) emit_insn (gen_clear_cache (addr, end_addr)); } -/* Return leaf_function_p () and memoize the result. */ - -static bool -riscv_leaf_function_p (void) -{ - if (cfun->machine->is_leaf == 0) - cfun->machine->is_leaf = leaf_function_p () ? 1 : -1; - - return cfun->machine->is_leaf > 0; -} - /* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */ static bool riscv_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, tree exp ATTRIBUTE_UNUSED) { - /* When optimzing for size, don't use sibcalls in non-leaf routines */ + /* Don't use sibcalls when use save-restore routine. */ if (TARGET_SAVE_RESTORE) - return riscv_leaf_function_p (); + return false; return true; } diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 8d4c75e6770..c5d134cbe57 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -126,10 +126,11 @@ along with GCC; see the file COPYING3. If not see /* There is no point aligning anything to a rounder boundary than this. */ #define BIGGEST_ALIGNMENT 128 -/* The user-level ISA permits misaligned accesses, but they may execute - extremely slowly and non-atomically. Some privileged architectures - do not permit them at all. It is best to enforce strict alignment. */ -#define STRICT_ALIGNMENT 1 +/* The user-level ISA permits unaligned accesses, but they are not required + of the privileged architecture. */ +#define STRICT_ALIGNMENT TARGET_STRICT_ALIGN + +#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) riscv_slow_unaligned_access /* Define this if you wish to imitate the way many other C compilers handle alignment of bitfields and the structures that contain @@ -864,6 +865,7 @@ while (0) #ifndef USED_FOR_TARGET extern const enum reg_class riscv_regno_to_class[]; extern bool riscv_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER]; +extern bool riscv_slow_unaligned_access; #endif #define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL) \ diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index 4cbb2431335..5f216d3255b 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -259,6 +259,9 @@ ;; Iterator for QImode extension patterns. (define_mode_iterator SUPERQI [HI SI (DI "TARGET_64BIT")]) +;; Iterator for extending loads. +(define_mode_iterator ZERO_EXTEND_LOAD [QI HI (SI "TARGET_64BIT")]) + ;; Iterator for hardware integer modes narrower than XLEN. (define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")]) @@ -1426,7 +1429,13 @@ (match_operand 1 "pmode_register_operand")] "" { +#ifdef ICACHE_FLUSH_FUNC + emit_library_call (gen_rtx_SYMBOL_REF (Pmode, ICACHE_FLUSH_FUNC), + LCT_NORMAL, VOIDmode, 3, operands[0], Pmode, + operands[1], Pmode, const0_rtx, Pmode); +#else emit_insn (gen_fence_i ()); +#endif DONE; }) diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt index 0466bb29d14..cfd0335d082 100644 --- a/gcc/config/riscv/riscv.opt +++ b/gcc/config/riscv/riscv.opt @@ -84,6 +84,10 @@ mcmodel= Target Report RejectNegative Joined Enum(code_model) Var(riscv_cmodel) Init(TARGET_DEFAULT_CMODEL) Specify the code model. +mstrict-align +Target Report Mask(STRICT_ALIGN) Save +Do not generate unaligned memory accesses. + Enum Name(code_model) Type(enum riscv_code_model) Known code models (for use with the -mcmodel= option): diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 2ad21619df9..06c7582c5a5 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2117,7 +2117,7 @@ UNSPEC_VPERMR))] "TARGET_P9_VECTOR" "@ - vpermr %0,%2,%1,%3 + vpermr %0,%1,%2,%3 xxpermr %x0,%x1,%x3" [(set_attr "type" "vecperm") (set_attr "length" "4")]) diff --git a/gcc/config/rs6000/power6.md b/gcc/config/rs6000/power6.md index eb53246a40e..0d81cdebda5 100644 --- a/gcc/config/rs6000/power6.md +++ b/gcc/config/rs6000/power6.md @@ -108,7 +108,7 @@ power6-store-update-indexed,\ power6-fpstore,\ power6-fpstore-update" - "store_data_bypass_p") + "rs6000_store_data_bypass_p") (define_insn_reservation "power6-load-ext" 4 ; fx (and (eq_attr "type" "load") @@ -128,7 +128,7 @@ power6-store-update-indexed,\ power6-fpstore,\ power6-fpstore-update" - "store_data_bypass_p") + "rs6000_store_data_bypass_p") (define_insn_reservation "power6-load-update" 2 ; fx (and (eq_attr "type" "load") @@ -276,7 +276,7 @@ power6-store-update-indexed,\ power6-fpstore,\ power6-fpstore-update" - "store_data_bypass_p") + "rs6000_store_data_bypass_p") (define_insn_reservation "power6-cntlz" 2 (and (eq_attr "type" "cntlz") @@ -289,7 +289,7 @@ power6-store-update-indexed,\ power6-fpstore,\ power6-fpstore-update" - "store_data_bypass_p") + "rs6000_store_data_bypass_p") (define_insn_reservation "power6-var-rotate" 4 (and (eq_attr "type" "shift") @@ -355,7 +355,7 @@ power6-store-update-indexed,\ power6-fpstore,\ power6-fpstore-update" - "store_data_bypass_p") + "rs6000_store_data_bypass_p") (define_insn_reservation "power6-delayed-compare" 2 ; N/A (and (eq_attr "type" "shift") @@ -420,7 +420,7 @@ power6-store-update-indexed,\ power6-fpstore,\ power6-fpstore-update" - "store_data_bypass_p") + "rs6000_store_data_bypass_p") (define_insn_reservation "power6-idiv" 44 (and (eq_attr "type" "div") @@ -436,7 +436,7 @@ ; power6-store-update-indexed,\ ; power6-fpstore,\ ; power6-fpstore-update" -; "store_data_bypass_p") +; "rs6000_store_data_bypass_p") (define_insn_reservation "power6-ldiv" 56 (and (eq_attr "type" "div") @@ -452,7 +452,7 @@ ; power6-store-update-indexed,\ ; power6-fpstore,\ ; power6-fpstore-update" -; "store_data_bypass_p") +; "rs6000_store_data_bypass_p") (define_insn_reservation "power6-mtjmpr" 2 (and (eq_attr "type" "mtjmpr,mfjmpr") @@ -510,7 +510,7 @@ (define_bypass 1 "power6-fp" "power6-fpstore,power6-fpstore-update" - "store_data_bypass_p") + "rs6000_store_data_bypass_p") (define_insn_reservation "power6-fpcompare" 8 (and (eq_attr "type" "fpcompare") diff --git a/gcc/config/rs6000/ppc-asm.h b/gcc/config/rs6000/ppc-asm.h index 33925e558b0..080be3f7ba0 100644 --- a/gcc/config/rs6000/ppc-asm.h +++ b/gcc/config/rs6000/ppc-asm.h @@ -120,7 +120,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define f47 47 #define f48 48 #define f49 49 -#define f50 30 +#define f50 50 #define f51 51 #define f52 52 #define f53 53 @@ -222,7 +222,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define vs47 47 #define vs48 48 #define vs49 49 -#define vs50 30 +#define vs50 50 #define vs51 51 #define vs52 52 #define vs53 53 diff --git a/gcc/config/rs6000/ppc-auxv.h b/gcc/config/rs6000/ppc-auxv.h index dcee28a8152..0bd3bc4332f 100644 --- a/gcc/config/rs6000/ppc-auxv.h +++ b/gcc/config/rs6000/ppc-auxv.h @@ -91,6 +91,7 @@ #define PPC_FEATURE2_HAS_IEEE128 0x00400000 #define PPC_FEATURE2_DARN 0x00200000 #define PPC_FEATURE2_SCV 0x00100000 +#define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000 /* Thread Control Block (TCB) offsets of the AT_PLATFORM, AT_HWCAP and diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 74ad733d1b9..0692510b98e 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -226,6 +226,7 @@ extern void rs6000_aix_asm_output_dwarf_table_ref (char *); extern void get_ppc476_thunk_name (char name[32]); extern bool rs6000_overloaded_builtin_p (enum rs6000_builtins); extern const char *rs6000_overloaded_builtin_name (enum rs6000_builtins); +extern int rs6000_store_data_bypass_p (rtx_insn *, rtx_insn *); extern HOST_WIDE_INT rs6000_builtin_mask_calculate (void); extern void rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label); @@ -239,6 +240,7 @@ extern void rs6000_cpu_cpp_builtins (struct cpp_reader *); #ifdef TREE_CODE extern bool rs6000_pragma_target_parse (tree, tree); #endif +extern void rs6000_activate_target_options (tree new_tree); extern void rs6000_target_modify_macros (bool, HOST_WIDE_INT, HOST_WIDE_INT); extern void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index d5213627f8f..323b218d229 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -375,6 +375,7 @@ static const struct { "ebb", PPC_FEATURE2_HAS_EBB, 1 }, { "htm", PPC_FEATURE2_HAS_HTM, 1 }, { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 }, + { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 }, { "isel", PPC_FEATURE2_HAS_ISEL, 1 }, { "tar", PPC_FEATURE2_HAS_TAR, 1 }, { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 }, @@ -510,6 +511,91 @@ mode_supports_pre_modify_p (machine_mode mode) != 0); } +/* Given that there exists at least one variable that is set (produced) + by OUT_INSN and read (consumed) by IN_INSN, return true iff + IN_INSN represents one or more memory store operations and none of + the variables set by OUT_INSN is used by IN_INSN as the address of a + store operation. If either IN_INSN or OUT_INSN does not represent + a "single" RTL SET expression (as loosely defined by the + implementation of the single_set function) or a PARALLEL with only + SETs, CLOBBERs, and USEs inside, this function returns false. + + This rs6000-specific version of store_data_bypass_p checks for + certain conditions that result in assertion failures (and internal + compiler errors) in the generic store_data_bypass_p function and + returns false rather than calling store_data_bypass_p if one of the + problematic conditions is detected. */ + +int +rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) +{ + rtx out_set, in_set; + rtx out_pat, in_pat; + rtx out_exp, in_exp; + int i, j; + + in_set = single_set (in_insn); + if (in_set) + { + if (MEM_P (SET_DEST (in_set))) + { + out_set = single_set (out_insn); + if (!out_set) + { + out_pat = PATTERN (out_insn); + if (GET_CODE (out_pat) == PARALLEL) + { + for (i = 0; i < XVECLEN (out_pat, 0); i++) + { + out_exp = XVECEXP (out_pat, 0, i); + if ((GET_CODE (out_exp) == CLOBBER) + || (GET_CODE (out_exp) == USE)) + continue; + else if (GET_CODE (out_exp) != SET) + return false; + } + } + } + } + } + else + { + in_pat = PATTERN (in_insn); + if (GET_CODE (in_pat) != PARALLEL) + return false; + + for (i = 0; i < XVECLEN (in_pat, 0); i++) + { + in_exp = XVECEXP (in_pat, 0, i); + if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE)) + continue; + else if (GET_CODE (in_exp) != SET) + return false; + + if (MEM_P (SET_DEST (in_exp))) + { + out_set = single_set (out_insn); + if (!out_set) + { + out_pat = PATTERN (out_insn); + if (GET_CODE (out_pat) != PARALLEL) + return false; + for (j = 0; j < XVECLEN (out_pat, 0); j++) + { + out_exp = XVECEXP (out_pat, 0, j); + if ((GET_CODE (out_exp) == CLOBBER) + || (GET_CODE (out_exp) == USE)) + continue; + else if (GET_CODE (out_exp) != SET) + return false; + } + } + } + } + } + return store_data_bypass_p (out_insn, in_insn); +} + /* Return true if we have D-form addressing in altivec registers. */ static inline bool mode_supports_vmx_dform (machine_mode mode) @@ -3899,14 +3985,10 @@ static bool rs6000_option_override_internal (bool global_init_p) { bool ret = true; - bool have_cpu = false; - - /* The default cpu requested at configure time, if any. */ - const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT; HOST_WIDE_INT set_masks; HOST_WIDE_INT ignore_masks; - int cpu_index; + int cpu_index = -1; int tune_index; struct cl_target_option *main_target_opt = ((global_init_p || target_option_default_node == NULL) @@ -3984,93 +4066,51 @@ rs6000_option_override_internal (bool global_init_p) with -mtune on the command line. Process a '--with-cpu' configuration request as an implicit --cpu. */ if (rs6000_cpu_index >= 0) - { - cpu_index = rs6000_cpu_index; - have_cpu = true; - } + cpu_index = rs6000_cpu_index; else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0) - { - rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index; - have_cpu = true; - } - else if (implicit_cpu) - { - rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu); - have_cpu = true; - } - else - { - /* PowerPC 64-bit LE requires at least ISA 2.07. */ - const char *default_cpu = ((!TARGET_POWERPC64) - ? "powerpc" - : ((BYTES_BIG_ENDIAN) - ? "powerpc64" - : "powerpc64le")); + cpu_index = main_target_opt->x_rs6000_cpu_index; + else if (OPTION_TARGET_CPU_DEFAULT) + cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT); - rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu); - have_cpu = false; - } - - gcc_assert (cpu_index >= 0); - - if (have_cpu) + if (cpu_index >= 0) { -#ifndef HAVE_AS_POWER9 - if (processor_target_table[rs6000_cpu_index].processor - == PROCESSOR_POWER9) + const char *unavailable_cpu = NULL; + switch (processor_target_table[cpu_index].processor) { - have_cpu = false; - warning (0, "will not generate power9 instructions because " - "assembler lacks power9 support"); - } +#ifndef HAVE_AS_POWER9 + case PROCESSOR_POWER9: + unavailable_cpu = "power9"; + break; #endif #ifndef HAVE_AS_POWER8 - if (processor_target_table[rs6000_cpu_index].processor - == PROCESSOR_POWER8) - { - have_cpu = false; - warning (0, "will not generate power8 instructions because " - "assembler lacks power8 support"); - } + case PROCESSOR_POWER8: + unavailable_cpu = "power8"; + break; #endif #ifndef HAVE_AS_POPCNTD - if (processor_target_table[rs6000_cpu_index].processor - == PROCESSOR_POWER7) - { - have_cpu = false; - warning (0, "will not generate power7 instructions because " - "assembler lacks power7 support"); - } + case PROCESSOR_POWER7: + unavailable_cpu = "power7"; + break; #endif #ifndef HAVE_AS_DFP - if (processor_target_table[rs6000_cpu_index].processor - == PROCESSOR_POWER6) - { - have_cpu = false; - warning (0, "will not generate power6 instructions because " - "assembler lacks power6 support"); - } + case PROCESSOR_POWER6: + unavailable_cpu = "power6"; + break; #endif #ifndef HAVE_AS_POPCNTB - if (processor_target_table[rs6000_cpu_index].processor - == PROCESSOR_POWER5) - { - have_cpu = false; - warning (0, "will not generate power5 instructions because " - "assembler lacks power5 support"); - } + case PROCESSOR_POWER5: + unavailable_cpu = "power5"; + break; #endif - - if (!have_cpu) + default: + break; + } + if (unavailable_cpu) { - /* PowerPC 64-bit LE requires at least ISA 2.07. */ - const char *default_cpu = (!TARGET_POWERPC64 - ? "powerpc" - : (BYTES_BIG_ENDIAN - ? "powerpc64" - : "powerpc64le")); - - rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu); + cpu_index = -1; + warning (0, "will not generate %qs instructions because " + "assembler lacks %qs support", unavailable_cpu, + unavailable_cpu); } } @@ -4079,8 +4119,9 @@ rs6000_option_override_internal (bool global_init_p) with those from the cpu, except for options that were explicitly set. If we don't have a cpu, do not override the target bits set in TARGET_DEFAULT. */ - if (have_cpu) + if (cpu_index >= 0) { + rs6000_cpu_index = cpu_index; rs6000_isa_flags &= ~set_masks; rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable & set_masks); @@ -4094,14 +4135,26 @@ rs6000_option_override_internal (bool global_init_p) If there is a TARGET_DEFAULT, use that. Otherwise fall back to using -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */ - HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT - : processor_target_table[cpu_index].target_enable); + HOST_WIDE_INT flags; + if (TARGET_DEFAULT) + flags = TARGET_DEFAULT; + else + { + /* PowerPC 64-bit LE requires at least ISA 2.07. */ + const char *default_cpu = (!TARGET_POWERPC64 + ? "powerpc" + : (BYTES_BIG_ENDIAN + ? "powerpc64" + : "powerpc64le")); + int default_cpu_index = rs6000_cpu_name_lookup (default_cpu); + flags = processor_target_table[default_cpu_index].target_enable; + } rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit); } if (rs6000_tune_index >= 0) tune_index = rs6000_tune_index; - else if (have_cpu) + else if (cpu_index >= 0) rs6000_tune_index = tune_index = cpu_index; else { @@ -4113,7 +4166,7 @@ rs6000_option_override_internal (bool global_init_p) for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) if (processor_target_table[i].processor == tune_proc) { - rs6000_tune_index = tune_index = i; + tune_index = i; break; } } @@ -4286,7 +4339,7 @@ rs6000_option_override_internal (bool global_init_p) rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); else if (TARGET_P9_MINMAX) { - if (have_cpu) + if (cpu_index >= 0) { if (cpu_index == PROCESSOR_POWER9) { @@ -5036,7 +5089,7 @@ rs6000_option_override_internal (bool global_init_p) default: - if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL)) + if (cpu_index >= 0 && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL)) rs6000_isa_flags &= ~OPTION_MASK_ISEL; break; @@ -7543,7 +7596,7 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt) { if (TARGET_P9_VECTOR) x = gen_rtx_UNSPEC (mode, - gen_rtvec (3, target, reg, + gen_rtvec (3, reg, target, force_reg (V16QImode, x)), UNSPEC_VPERMR); else @@ -33091,14 +33144,14 @@ rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, case TYPE_LOAD: case TYPE_CNTLZ: { - if (! store_data_bypass_p (dep_insn, insn)) + if (! rs6000_store_data_bypass_p (dep_insn, insn)) return get_attr_sign_extend (dep_insn) == SIGN_EXTEND_YES ? 6 : 4; break; } case TYPE_SHIFT: { - if (! store_data_bypass_p (dep_insn, insn)) + if (! rs6000_store_data_bypass_p (dep_insn, insn)) return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? 6 : 3; break; @@ -33109,7 +33162,7 @@ rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, case TYPE_EXTS: case TYPE_INSERT: { - if (! store_data_bypass_p (dep_insn, insn)) + if (! rs6000_store_data_bypass_p (dep_insn, insn)) return 3; break; } @@ -33118,19 +33171,19 @@ rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, case TYPE_FPSTORE: { if (get_attr_update (dep_insn) == UPDATE_YES - && ! store_data_bypass_p (dep_insn, insn)) + && ! rs6000_store_data_bypass_p (dep_insn, insn)) return 3; break; } case TYPE_MUL: { - if (! store_data_bypass_p (dep_insn, insn)) + if (! rs6000_store_data_bypass_p (dep_insn, insn)) return 17; break; } case TYPE_DIV: { - if (! store_data_bypass_p (dep_insn, insn)) + if (! rs6000_store_data_bypass_p (dep_insn, insn)) return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; break; } @@ -37307,14 +37360,16 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, *total = COSTS_N_INSNS (1); return true; } + /* FALLTHRU */ + + case GT: + case LT: + case UNORDERED: if (outer_code == SET) { if (XEXP (x, 1) == const0_rtx) { - if (TARGET_ISEL && !TARGET_MFCRF) - *total = COSTS_N_INSNS (8); - else - *total = COSTS_N_INSNS (2); + *total = COSTS_N_INSNS (2); return true; } else @@ -37323,19 +37378,6 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, return false; } } - /* FALLTHRU */ - - case GT: - case LT: - case UNORDERED: - if (outer_code == SET && (XEXP (x, 1) == const0_rtx)) - { - if (TARGET_ISEL && !TARGET_MFCRF) - *total = COSTS_N_INSNS (8); - else - *total = COSTS_N_INSNS (2); - return true; - } /* CC COMPARE. */ if (outer_code == COMPARE) { @@ -37994,7 +38036,7 @@ altivec_expand_vec_perm_le (rtx operands[4]) if (TARGET_P9_VECTOR) { - unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel), + unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel), UNSPEC_VPERMR); } else @@ -39400,9 +39442,9 @@ rs6000_valid_attribute_p (tree fndecl, { struct cl_target_option cur_target; bool ret; - tree old_optimize = build_optimization_node (&global_options); + tree old_optimize; tree new_target, new_optimize; - tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); + tree func_optimize; gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE)); @@ -39529,6 +39571,7 @@ rs6000_pragma_target_parse (tree args, tree pop_target) } target_option_current_node = cur_tree; + rs6000_activate_target_options (target_option_current_node); /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly change the macros that are defined. */ @@ -39569,7 +39612,7 @@ static GTY(()) tree rs6000_previous_fndecl; /* Restore target's globals from NEW_TREE and invalidate the rs6000_previous_fndecl cache. */ -static void +void rs6000_activate_target_options (tree new_tree) { cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree)); @@ -39949,7 +39992,8 @@ rs6000_address_for_fpconvert (rtx x) gcc_assert (MEM_P (x)); addr = XEXP (x, 0); - if (! legitimate_indirect_address_p (addr, strict_p) + if (can_create_pseudo_p () + && ! legitimate_indirect_address_p (addr, strict_p) && ! legitimate_indexed_address_p (addr, strict_p)) { if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 3323976a35d..8c3ccda8d91 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -13304,14 +13304,12 @@ ; Some 32-bit ABIs do not have a red zone, so the stack deallocation has to ; stay behind all restores from the stack, it cannot be reordered to before -; one. See PR77687. This insn is an add or mr, and a stack_tie on the -; operands of that. +; one. See PR77687. This insn is an add or mr, and a memory clobber. (define_insn "stack_restore_tie" [(set (match_operand:SI 0 "gpc_reg_operand" "=r,r") (plus:SI (match_operand:SI 1 "gpc_reg_operand" "r,r") (match_operand:SI 2 "reg_or_cint_operand" "O,rI"))) - (set (mem:BLK (match_dup 0)) (const_int 0)) - (set (mem:BLK (match_dup 1)) (const_int 0))] + (set (mem:BLK (scratch)) (const_int 0))] "TARGET_32BIT" "@ mr %0,%1 @@ -14644,6 +14642,9 @@ { if (GET_CODE (operands[2]) == SCRATCH) operands[2] = gen_reg_rtx (DImode); + + if (MEM_P (operands[1])) + operands[1] = rs6000_address_for_fpconvert (operands[1]); }) (define_insn_and_split "float<QHI:mode><IEEE128:mode>2" @@ -14707,6 +14708,9 @@ { if (GET_CODE (operands[2]) == SCRATCH) operands[2] = gen_reg_rtx (DImode); + + if (MEM_P (operands[1])) + operands[1] = rs6000_address_for_fpconvert (operands[1]); }) (define_insn_and_split "floatuns<QHI:mode><IEEE128:mode>2" diff --git a/gcc/config/rs6000/t-rtems b/gcc/config/rs6000/t-rtems index 1633b39367f..66c20aadea5 100644 --- a/gcc/config/rs6000/t-rtems +++ b/gcc/config/rs6000/t-rtems @@ -78,4 +78,3 @@ MULTILIB_REQUIRED += mcpu=860 MULTILIB_REQUIRED += mcpu=e6500/m32 MULTILIB_REQUIRED += mcpu=e6500/m32/msoft-float/mno-altivec MULTILIB_REQUIRED += mcpu=e6500/m64 -MULTILIB_REQUIRED += mcpu=e6500/m64/msoft-float/mno-altivec diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c index 1e74f380bdb..414c64b2b5b 100644 --- a/gcc/config/rx/rx.c +++ b/gcc/config/rx/rx.c @@ -284,6 +284,9 @@ rx_is_restricted_memory_address (rtx mem, machine_mode mode) /* Simple memory addresses are OK. */ return true; + case SUBREG: + return RX_REG_P (SUBREG_REG (mem)); + case PRE_DEC: case POST_INC: return false; diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md index eae3e3b7d49..ef58f129a1d 100644 --- a/gcc/config/rx/rx.md +++ b/gcc/config/rx/rx.md @@ -2167,6 +2167,7 @@ [(plus "add") (minus "sub") (ior "ior") (xor "xor") (and "and")]) (define_mode_iterator QIHI [QI HI]) +(define_mode_attr BW [(QI "B") (HI "W")]) (define_insn "sync_lock_test_and_setsi" [(set (match_operand:SI 0 "register_operand" "=r,r") @@ -2208,7 +2209,7 @@ (set (match_dup 1) (match_operand:QIHI 2 "register_operand" "0"))] "" - "xchg\t%1, %0" + "xchg\t%1.<BW>, %0" [(set_attr "length" "6") (set_attr "timings" "22")] ) diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index e19e977087d..4585c4855c7 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -2277,8 +2277,8 @@ "" { rtx last; + rtx func_ptr = gen_reg_rtx (Pmode); - operands[3] = gen_reg_rtx (Pmode); /* Emit the move of the address to a pseudo outside of the libcall. */ if (TARGET_DIVIDE_CALL_TABLE) { @@ -2298,16 +2298,16 @@ emit_move_insn (operands[0], operands[2]); DONE; } - function_symbol (operands[3], "__udivsi3_i4i", SFUNC_GOT); - last = gen_udivsi3_i4_int (operands[0], operands[3]); + function_symbol (func_ptr, "__udivsi3_i4i", SFUNC_GOT); + last = gen_udivsi3_i4_int (operands[0], func_ptr); } else if (TARGET_DIVIDE_CALL_FP) { - rtx lab = function_symbol (operands[3], "__udivsi3_i4", SFUNC_STATIC).lab; + rtx lab = function_symbol (func_ptr, "__udivsi3_i4", SFUNC_STATIC).lab; if (TARGET_FPU_SINGLE) - last = gen_udivsi3_i4_single (operands[0], operands[3], lab); + last = gen_udivsi3_i4_single (operands[0], func_ptr, lab); else - last = gen_udivsi3_i4 (operands[0], operands[3], lab); + last = gen_udivsi3_i4 (operands[0], func_ptr, lab); } else if (TARGET_SH2A) { @@ -2318,8 +2318,8 @@ } else { - rtx lab = function_symbol (operands[3], "__udivsi3", SFUNC_STATIC).lab; - last = gen_udivsi3_i1 (operands[0], operands[3], lab); + rtx lab = function_symbol (func_ptr, "__udivsi3", SFUNC_STATIC).lab; + last = gen_udivsi3_i1 (operands[0], func_ptr, lab); } emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]); @@ -2405,22 +2405,22 @@ "" { rtx last; + rtx func_ptr = gen_reg_rtx (Pmode); - operands[3] = gen_reg_rtx (Pmode); /* Emit the move of the address to a pseudo outside of the libcall. */ if (TARGET_DIVIDE_CALL_TABLE) { - function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT); - last = gen_divsi3_i4_int (operands[0], operands[3]); + function_symbol (func_ptr, sh_divsi3_libfunc, SFUNC_GOT); + last = gen_divsi3_i4_int (operands[0], func_ptr); } else if (TARGET_DIVIDE_CALL_FP) { - rtx lab = function_symbol (operands[3], sh_divsi3_libfunc, + rtx lab = function_symbol (func_ptr, sh_divsi3_libfunc, SFUNC_STATIC).lab; if (TARGET_FPU_SINGLE) - last = gen_divsi3_i4_single (operands[0], operands[3], lab); + last = gen_divsi3_i4_single (operands[0], func_ptr, lab); else - last = gen_divsi3_i4 (operands[0], operands[3], lab); + last = gen_divsi3_i4 (operands[0], func_ptr, lab); } else if (TARGET_SH2A) { @@ -2431,8 +2431,8 @@ } else { - function_symbol (operands[3], sh_divsi3_libfunc, SFUNC_GOT); - last = gen_divsi3_i1 (operands[0], operands[3]); + function_symbol (func_ptr, sh_divsi3_libfunc, SFUNC_GOT); + last = gen_divsi3_i1 (operands[0], func_ptr); } emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); emit_move_insn (gen_rtx_REG (SImode, 5), operands[2]); @@ -6519,6 +6519,7 @@ [(call (mem (match_operand:SI 0 "symbol_ref_operand" "")) (match_operand 1 "" "")) (use (reg:SI FPSCR_MODES_REG)) + (use (match_scratch 2)) (clobber (reg:SI PR_REG))] "TARGET_SH2A && sh2a_is_function_vector_call (operands[0])" { @@ -6629,6 +6630,7 @@ (call (mem:SI (match_operand:SI 1 "symbol_ref_operand" "")) (match_operand 2 "" ""))) (use (reg:SI FPSCR_MODES_REG)) + (use (match_scratch 3)) (clobber (reg:SI PR_REG))] "TARGET_SH2A && sh2a_is_function_vector_call (operands[1])" { @@ -7044,13 +7046,11 @@ [(const_int 0)] { rtx lab = PATTERN (gen_call_site ()); - rtx call_insn; - - operands[3] = gen_rtx_REG (SImode, R1_REG); + rtx tmp = gen_rtx_REG (SImode, R1_REG); - sh_expand_sym_label2reg (operands[3], operands[1], lab, true); - call_insn = emit_call_insn (gen_sibcall_valuei_pcrel (operands[0], - operands[3], + sh_expand_sym_label2reg (tmp, operands[1], lab, true); + rtx call_insn = emit_call_insn (gen_sibcall_valuei_pcrel (operands[0], + tmp, operands[2], copy_rtx (lab))); SIBLING_CALL_P (call_insn) = 1; @@ -7078,12 +7078,11 @@ [(const_int 0)] { rtx lab = PATTERN (gen_call_site ()); + rtx tmp = gen_rtx_REG (SImode, R1_REG); - operands[3] = gen_rtx_REG (SImode, R1_REG); - - sh_expand_sym_label2reg (operands[3], operands[1], lab, true); + sh_expand_sym_label2reg (tmp, operands[1], lab, true); rtx i = emit_call_insn (gen_sibcall_valuei_pcrel_fdpic (operands[0], - operands[3], + tmp, operands[2], copy_rtx (lab))); SIBLING_CALL_P (i) = 1; diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h index bc53a1ca284..d522de03f4a 100644 --- a/gcc/config/sol2.h +++ b/gcc/config/sol2.h @@ -205,8 +205,8 @@ along with GCC; see the file COPYING3. If not see /* We don't use the standard svr4 STARTFILE_SPEC because it's wrong for us. */ #undef STARTFILE_SPEC #ifdef HAVE_SOLARIS_CRTS -/* Since Solaris 11.x and Solaris 12, the OS delivers crt1.o, crti.o, and - crtn.o, with a hook for compiler-dependent stuff like profile handling. */ +/* Since Solaris 11.4, the OS delivers crt1.o, crti.o, and crtn.o, with a hook + for compiler-dependent stuff like profile handling. */ #define STARTFILE_SPEC "%{!shared:%{!symbolic: \ crt1.o%s \ %{p:%e-p is not supported; \ diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 83ca1dcc617..b9c8dcc57c1 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -914,6 +914,80 @@ mem_ref (rtx x) return NULL_RTX; } +/* True if any of INSN's source register(s) is REG. */ + +static bool +insn_uses_reg_p (rtx_insn *insn, unsigned int reg) +{ + extract_insn (insn); + return ((REG_P (recog_data.operand[1]) + && REGNO (recog_data.operand[1]) == reg) + || (recog_data.n_operands == 3 + && REG_P (recog_data.operand[2]) + && REGNO (recog_data.operand[2]) == reg)); +} + +/* True if INSN is a floating-point division or square-root. */ + +static bool +div_sqrt_insn_p (rtx_insn *insn) +{ + if (GET_CODE (PATTERN (insn)) != SET) + return false; + + switch (get_attr_type (insn)) + { + case TYPE_FPDIVS: + case TYPE_FPSQRTS: + case TYPE_FPDIVD: + case TYPE_FPSQRTD: + return true; + default: + return false; + } +} + +/* True if INSN is a floating-point instruction. */ + +static bool +fpop_insn_p (rtx_insn *insn) +{ + if (GET_CODE (PATTERN (insn)) != SET) + return false; + + switch (get_attr_type (insn)) + { + case TYPE_FPMOVE: + case TYPE_FPCMOVE: + case TYPE_FP: + case TYPE_FPCMP: + case TYPE_FPMUL: + case TYPE_FPDIVS: + case TYPE_FPSQRTS: + case TYPE_FPDIVD: + case TYPE_FPSQRTD: + return true; + default: + return false; + } +} + +/* True if INSN is an atomic instruction. */ + +static bool +atomic_insn_for_leon3_p (rtx_insn *insn) +{ + switch (INSN_CODE (insn)) + { + case CODE_FOR_swapsi: + case CODE_FOR_ldstub: + case CODE_FOR_atomic_compare_and_swap_leon3_1: + return true; + default: + return false; + } +} + /* We use a machine specific pass to enable workarounds for errata. We need to have the (essentially) final form of the insn stream in order @@ -939,11 +1013,134 @@ sparc_do_work_around_errata (void) { bool insert_nop = false; rtx set; + rtx_insn *jump; + rtx_sequence *seq; /* Look into the instruction in a delay slot. */ - if (NONJUMP_INSN_P (insn)) - if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn))) + if (NONJUMP_INSN_P (insn) + && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))) + { + jump = seq->insn (0); insn = seq->insn (1); + } + else if (JUMP_P (insn)) + jump = insn; + else + jump = NULL; + + /* Place a NOP at the branch target of an integer branch if it is a + floating-point operation or a floating-point branch. */ + if (sparc_fix_gr712rc + && jump + && jump_to_label_p (jump) + && get_attr_branch_type (jump) == BRANCH_TYPE_ICC) + { + rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump)); + if (target + && (fpop_insn_p (target) + || (JUMP_P (target) + && get_attr_branch_type (target) == BRANCH_TYPE_FCC))) + emit_insn_before (gen_nop (), target); + } + + /* Insert a NOP between load instruction and atomic instruction. Insert + a NOP at branch target if there is a load in delay slot and an atomic + instruction at branch target. */ + if (sparc_fix_ut700 + && NONJUMP_INSN_P (insn) + && (set = single_set (insn)) != NULL_RTX + && mem_ref (SET_SRC (set)) + && REG_P (SET_DEST (set))) + { + if (jump && jump_to_label_p (jump)) + { + rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump)); + if (target && atomic_insn_for_leon3_p (target)) + emit_insn_before (gen_nop (), target); + } + + next = next_active_insn (insn); + if (!next) + break; + + if (atomic_insn_for_leon3_p (next)) + insert_nop = true; + } + + /* Look for a sequence that starts with a fdiv or fsqrt instruction and + ends with another fdiv or fsqrt instruction with no dependencies on + the former, along with an appropriate pattern in between. */ + if (sparc_fix_lost_divsqrt + && NONJUMP_INSN_P (insn) + && div_sqrt_insn_p (insn)) + { + int i; + int fp_found = 0; + rtx_insn *after; + + const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn))); + + next = next_active_insn (insn); + if (!next) + break; + + for (after = next, i = 0; i < 4; i++) + { + /* Count floating-point operations. */ + if (i != 3 && fpop_insn_p (after)) + { + /* If the insn uses the destination register of + the div/sqrt, then it cannot be problematic. */ + if (insn_uses_reg_p (after, dest_reg)) + break; + fp_found++; + } + + /* Count floating-point loads. */ + if (i != 3 + && (set = single_set (after)) != NULL_RTX + && REG_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) > 31) + { + /* If the insn uses the destination register of + the div/sqrt, then it cannot be problematic. */ + if (REGNO (SET_DEST (set)) == dest_reg) + break; + fp_found++; + } + + /* Check if this is a problematic sequence. */ + if (i > 1 + && fp_found >= 2 + && div_sqrt_insn_p (after)) + { + /* If this is the short version of the problematic + sequence we add two NOPs in a row to also prevent + the long version. */ + if (i == 2) + emit_insn_before (gen_nop (), next); + insert_nop = true; + break; + } + + /* No need to scan past a second div/sqrt. */ + if (div_sqrt_insn_p (after)) + break; + + /* Insert NOP before branch. */ + if (i < 3 + && (!NONJUMP_INSN_P (after) + || GET_CODE (PATTERN (after)) == SEQUENCE)) + { + insert_nop = true; + break; + } + + after = next_active_insn (after); + if (!after) + break; + } + } /* Look for either of these two sequences: @@ -1003,8 +1200,8 @@ sparc_do_work_around_errata (void) then the sequence cannot be problematic. */ if (i == 0) { - if (((set = single_set (after)) != NULL_RTX) - && (MEM_P (SET_DEST (set)) || MEM_P (SET_SRC (set)))) + if ((set = single_set (after)) != NULL_RTX + && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set)))) break; after = next_active_insn (after); @@ -1014,21 +1211,21 @@ sparc_do_work_around_errata (void) /* Add NOP if third instruction is a store. */ if (i == 1 - && ((set = single_set (after)) != NULL_RTX) + && (set = single_set (after)) != NULL_RTX && MEM_P (SET_DEST (set))) insert_nop = true; } } - else + /* Look for a single-word load into an odd-numbered FP register. */ - if (sparc_fix_at697f - && NONJUMP_INSN_P (insn) - && (set = single_set (insn)) != NULL_RTX - && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 - && MEM_P (SET_SRC (set)) - && REG_P (SET_DEST (set)) - && REGNO (SET_DEST (set)) > 31 - && REGNO (SET_DEST (set)) % 2 != 0) + else if (sparc_fix_at697f + && NONJUMP_INSN_P (insn) + && (set = single_set (insn)) != NULL_RTX + && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 + && mem_ref (SET_SRC (set)) + && REG_P (SET_DEST (set)) + && REGNO (SET_DEST (set)) > 31 + && REGNO (SET_DEST (set)) % 2 != 0) { /* The wrong dependency is on the enclosing double register. */ const unsigned int x = REGNO (SET_DEST (set)) - 1; @@ -1095,7 +1292,8 @@ sparc_do_work_around_errata (void) && NONJUMP_INSN_P (insn) && (set = single_set (insn)) != NULL_RTX && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4 - && mem_ref (SET_SRC (set)) != NULL_RTX + && (mem_ref (SET_SRC (set)) != NULL_RTX + || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op) && REG_P (SET_DEST (set)) && REGNO (SET_DEST (set)) < 32) { @@ -1133,6 +1331,11 @@ sparc_do_work_around_errata (void) && REGNO (src) != REGNO (x))) && !reg_mentioned_p (x, XEXP (dest, 0))) insert_nop = true; + + /* GOT accesses uses LD. */ + else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op + && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1))) + insert_nop = true; } } @@ -1272,7 +1475,8 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst; + return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst + || sparc_fix_gr712rc || sparc_fix_ut700 || sparc_fix_lost_divsqrt; } virtual unsigned int execute (function *) @@ -1642,9 +1846,12 @@ sparc_option_override (void) if (!(target_flags_explicit & MASK_LRA)) target_flags |= MASK_LRA; - /* Enable the back-to-back store errata workaround for LEON3FT. */ + /* Enable applicable errata workarounds for LEON3FT. */ if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc) + { sparc_fix_b2bst = 1; + sparc_fix_lost_divsqrt = 1; + } /* Disable FsMULd for the UT699 since it doesn't work correctly. */ if (sparc_fix_ut699) diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 321c0d8b9fe..4ddbe56fbf4 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -430,6 +430,14 @@ (symbol_ref "(sparc_fix_b2bst != 0 ? FIX_B2BST_TRUE : FIX_B2BST_FALSE)")) +(define_attr "fix_lost_divsqrt" "false,true" + (symbol_ref "(sparc_fix_lost_divsqrt != 0 + ? FIX_LOST_DIVSQRT_TRUE : FIX_LOST_DIVSQRT_FALSE)")) + +(define_attr "fix_gr712rc" "false,true" + (symbol_ref "(sparc_fix_gr712rc != 0 + ? FIX_GR712RC_TRUE : FIX_GR712RC_FALSE)")) + ;; Length (in # of insns). ;; Beware that setting a length greater or equal to 3 for conditional branches ;; has a side-effect (see output_cbranch and output_v9branch). @@ -577,6 +585,9 @@ (define_attr "in_branch_delay" "false,true" (cond [(eq_attr "type" "uncond_branch,branch,cbcond,uncond_cbcond,call,sibcall,call_no_delay_slot,multi") (const_string "false") + (and (eq_attr "fix_lost_divsqrt" "true") + (eq_attr "type" "fpdivs,fpsqrts,fpdivd,fpsqrtd")) + (const_string "false") (and (eq_attr "fix_b2bst" "true") (eq_attr "type" "store,fpstore")) (const_string "false") (and (eq_attr "fix_ut699" "true") (eq_attr "type" "load,sload")) @@ -590,6 +601,15 @@ (const_string "true") ] (const_string "false"))) +(define_attr "in_integer_branch_annul_delay" "false,true" + (cond [(and (eq_attr "fix_gr712rc" "true") + (eq_attr "type" "fp,fpcmp,fpmove,fpcmove,fpmul, + fpdivs,fpsqrts,fpdivd,fpsqrtd")) + (const_string "false") + (eq_attr "in_branch_delay" "true") + (const_string "true") + ] (const_string "false"))) + (define_delay (eq_attr "type" "call") [(eq_attr "in_call_delay" "true") (nil) (nil)]) @@ -599,9 +619,15 @@ (define_delay (eq_attr "type" "return") [(eq_attr "in_return_delay" "true") (nil) (nil)]) -(define_delay (eq_attr "type" "branch") +(define_delay (and (eq_attr "type" "branch") + (not (eq_attr "branch_type" "icc"))) [(eq_attr "in_branch_delay" "true") (nil) (eq_attr "in_branch_delay" "true")]) +(define_delay (and (eq_attr "type" "branch") + (eq_attr "branch_type" "icc")) + [(eq_attr "in_branch_delay" "true") (nil) + (eq_attr "in_integer_branch_annul_delay" "true")]) + (define_delay (eq_attr "type" "uncond_branch") [(eq_attr "in_branch_delay" "true") (nil) (nil)]) diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt index 22267f50e90..71ead75831d 100644 --- a/gcc/config/sparc/sparc.opt +++ b/gcc/config/sparc/sparc.opt @@ -253,6 +253,10 @@ Enable workarounds for the errata of the GR712RC processor. TargetVariable unsigned int sparc_fix_b2bst +;; Enable workaround for GRLIB-TN-0013 errata +TargetVariable +unsigned int sparc_fix_lost_divsqrt + Mask(LONG_DOUBLE_128) ;; Use 128-bit long double diff --git a/gcc/config/sparc/sync.md b/gcc/config/sparc/sync.md index 1593bdeb903..43c66e96ba3 100644 --- a/gcc/config/sparc/sync.md +++ b/gcc/config/sparc/sync.md @@ -212,7 +212,7 @@ "cas<modesuffix>\t%1, %2, %0" [(set_attr "type" "multi")]) -(define_insn "*atomic_compare_and_swap_leon3_1" +(define_insn "atomic_compare_and_swap_leon3_1" [(set (match_operand:SI 0 "register_operand" "=r") (match_operand:SI 1 "mem_noofs_operand" "+w")) (set (match_dup 1) @@ -222,12 +222,16 @@ UNSPECV_CAS))] "TARGET_LEON3" { + if (sparc_fix_gr712rc) + output_asm_insn (".align\t16", operands); if (TARGET_SV_MODE) return "casa\t%1 0xb, %2, %0"; /* ASI for supervisor data space. */ else return "casa\t%1 0xa, %2, %0"; /* ASI for user data space. */ } - [(set_attr "type" "multi")]) + [(set_attr "type" "multi") + (set (attr "length") (if_then_else (eq_attr "fix_gr712rc" "true") + (const_int 4) (const_int 1)))]) (define_insn "*atomic_compare_and_swapdi_v8plus" [(set (match_operand:DI 0 "register_operand" "=h") @@ -275,8 +279,15 @@ (set (match_dup 1) (match_operand:SI 2 "register_operand" "0"))] "(TARGET_V8 || TARGET_V9) && !sparc_fix_ut699" - "swap\t%1, %0" - [(set_attr "type" "multi")]) +{ + if (sparc_fix_gr712rc) + return ".align\t16\n\tswap\t%1, %0"; + else + return "swap\t%1, %0"; +} + [(set_attr "type" "multi") + (set (attr "length") (if_then_else (eq_attr "fix_gr712rc" "true") + (const_int 4) (const_int 1)))]) (define_expand "atomic_test_and_set" [(match_operand:QI 0 "register_operand" "") @@ -307,5 +318,12 @@ UNSPECV_LDSTUB)) (set (match_dup 1) (const_int -1))] "!sparc_fix_ut699" - "ldstub\t%1, %0" - [(set_attr "type" "multi")]) +{ + if (sparc_fix_gr712rc) + return ".align\t16\n\tldstub\t%1, %0"; + else + return "ldstub\t%1, %0"; +} + [(set_attr "type" "multi") + (set (attr "length") (if_then_else (eq_attr "fix_gr712rc" "true") + (const_int 4) (const_int 1)))]) diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h index 38901b7530a..dac5657948e 100644 --- a/gcc/config/xtensa/xtensa-protos.h +++ b/gcc/config/xtensa/xtensa-protos.h @@ -73,5 +73,6 @@ extern void xtensa_expand_prologue (void); extern void xtensa_expand_epilogue (void); extern void order_regs_for_local_alloc (void); extern enum reg_class xtensa_regno_to_class (int regno); +extern HOST_WIDE_INT xtensa_initial_elimination_offset (int from, int to); #endif /* !__XTENSA_PROTOS_H__ */ diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c index 8c404187107..9bdf870a0fd 100644 --- a/gcc/config/xtensa/xtensa.c +++ b/gcc/config/xtensa/xtensa.c @@ -176,6 +176,7 @@ static bool xtensa_member_type_forces_blk (const_tree, machine_mode mode); static void xtensa_conditional_register_usage (void); +static unsigned HOST_WIDE_INT xtensa_asan_shadow_offset (void); @@ -304,6 +305,9 @@ static void xtensa_conditional_register_usage (void); #undef TARGET_CONDITIONAL_REGISTER_USAGE #define TARGET_CONDITIONAL_REGISTER_USAGE xtensa_conditional_register_usage +#undef TARGET_ASAN_SHADOW_OFFSET +#define TARGET_ASAN_SHADOW_OFFSET xtensa_asan_shadow_offset + struct gcc_target targetm = TARGET_INITIALIZER; @@ -2678,6 +2682,30 @@ xtensa_frame_pointer_required (void) return false; } +HOST_WIDE_INT +xtensa_initial_elimination_offset (int from, int to) +{ + long frame_size = compute_frame_size (get_frame_size ()); + HOST_WIDE_INT offset; + + switch (from) + { + case FRAME_POINTER_REGNUM: + if (FRAME_GROWS_DOWNWARD) + offset = frame_size - (WINDOW_SIZE * UNITS_PER_WORD) + - cfun->machine->callee_save_size; + else + offset = 0; + break; + case ARG_POINTER_REGNUM: + offset = frame_size; + break; + default: + gcc_unreachable (); + } + + return offset; +} /* minimum frame = reg save area (4 words) plus static chain (1 word) and the total number of words must be a multiple of 128 bits. */ @@ -4313,4 +4341,12 @@ enum reg_class xtensa_regno_to_class (int regno) return regno_to_class[regno]; } +/* Implement TARGET_ASAN_SHADOW_OFFSET. */ + +static unsigned HOST_WIDE_INT +xtensa_asan_shadow_offset (void) +{ + return HOST_WIDE_INT_UC (0x10000000); +} + #include "gt-xtensa.h" diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h index c32e8caf6dd..ec9ee59b092 100644 --- a/gcc/config/xtensa/xtensa.h +++ b/gcc/config/xtensa/xtensa.h @@ -460,9 +460,12 @@ enum reg_class #define STACK_GROWS_DOWNWARD 1 +#define FRAME_GROWS_DOWNWARD (flag_stack_protect \ + || (flag_sanitize & SANITIZE_ADDRESS) != 0) + /* Offset within stack frame to start allocating local variables at. */ #define STARTING_FRAME_OFFSET \ - crtl->outgoing_args_size + (FRAME_GROWS_DOWNWARD ? 0 : crtl->outgoing_args_size) /* The ARG_POINTER and FRAME_POINTER are not real Xtensa registers, so they are eliminated to either the stack pointer or hard frame pointer. */ @@ -474,20 +477,7 @@ enum reg_class /* Specify the initial difference between the specified pair of registers. */ #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \ - do { \ - long frame_size = compute_frame_size (get_frame_size ()); \ - switch (FROM) \ - { \ - case FRAME_POINTER_REGNUM: \ - (OFFSET) = 0; \ - break; \ - case ARG_POINTER_REGNUM: \ - (OFFSET) = frame_size; \ - break; \ - default: \ - gcc_unreachable (); \ - } \ - } while (0) + (OFFSET) = xtensa_initial_elimination_offset ((FROM), (TO)) /* If defined, the maximum amount of space required for outgoing arguments will be computed and placed into the variable |