diff options
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386.c | 157 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 58 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 12 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 2 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 4 | ||||
-rw-r--r-- | gcc/config/i386/xm-mingw32.h | 5 |
6 files changed, 132 insertions, 106 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index cd9153efe57..2af4e9a2859 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2444,53 +2444,6 @@ struct GTY(()) stack_local_entry { struct stack_local_entry *next; }; -/* Structure describing stack frame layout. - Stack grows downward: - - [arguments] - <- ARG_POINTER - saved pc - - saved static chain if ix86_static_chain_on_stack - - saved frame pointer if frame_pointer_needed - <- HARD_FRAME_POINTER - [saved regs] - <- regs_save_offset - [padding0] - - [saved SSE regs] - <- sse_regs_save_offset - [padding1] | - | <- FRAME_POINTER - [va_arg registers] | - | - [frame] | - | - [padding2] | = to_allocate - <- STACK_POINTER - */ -struct ix86_frame -{ - int nsseregs; - int nregs; - int va_arg_size; - int red_zone_size; - int outgoing_arguments_size; - - /* The offsets relative to ARG_POINTER. */ - HOST_WIDE_INT frame_pointer_offset; - HOST_WIDE_INT hard_frame_pointer_offset; - HOST_WIDE_INT stack_pointer_offset; - HOST_WIDE_INT hfp_save_offset; - HOST_WIDE_INT reg_save_offset; - HOST_WIDE_INT sse_reg_save_offset; - - /* When save_regs_using_mov is set, emit prologue using - move instead of push instructions. */ - bool save_regs_using_mov; -}; - /* Which cpu are we scheduling for. */ enum attr_cpu ix86_schedule; @@ -2582,7 +2535,7 @@ static unsigned int ix86_function_arg_boundary (machine_mode, const_tree); static rtx ix86_static_chain (const_tree, bool); static int ix86_function_regparm (const_tree, const_tree); -static void ix86_compute_frame_layout (struct ix86_frame *); +static void ix86_compute_frame_layout (void); static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode, rtx, rtx, int); static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT); @@ -2686,7 +2639,7 @@ rest_of_handle_insert_vzeroupper (void) int i; /* vzeroupper instructions are inserted immediately after reload to - account for possible spills from 256bit registers. The pass + account for possible spills from 256bit or 512bit registers. The pass reuses mode switching infrastructure by re-running mode insertion pass, so disable entities that have already been processed. */ for (i = 0; i < MAX_386_ENTITIES; i++) @@ -4185,7 +4138,7 @@ public: /* opt_pass methods: */ virtual bool gate (function *) { - return TARGET_AVX && !TARGET_AVX512F + return TARGET_AVX && TARGET_VZEROUPPER && flag_expensive_optimizations && !optimize_size; } @@ -6203,7 +6156,8 @@ ix86_option_override_internal (bool main_args_p, #endif } - if (!(opts_set->x_target_flags & MASK_VZEROUPPER)) + if (!(opts_set->x_target_flags & MASK_VZEROUPPER) + && TARGET_EMIT_VZEROUPPER) opts->x_target_flags |= MASK_VZEROUPPER; if (!(opts_set->x_target_flags & MASK_STV)) opts->x_target_flags |= MASK_STV; @@ -11889,8 +11843,6 @@ symbolic_reference_mentioned_p (rtx op) bool ix86_can_use_return_insn_p (void) { - struct ix86_frame frame; - /* Don't use `ret' instruction in interrupt handler. */ if (! reload_completed || frame_pointer_needed @@ -11902,7 +11854,8 @@ ix86_can_use_return_insn_p (void) if (crtl->args.pops_args && crtl->args.size >= 32768) return 0; - ix86_compute_frame_layout (&frame); + ix86_compute_frame_layout (); + struct ix86_frame &frame = cfun->machine->frame; return (frame.stack_pointer_offset == UNITS_PER_WORD && (frame.nregs + frame.nsseregs) == 0); } @@ -12388,8 +12341,8 @@ ix86_can_eliminate (const int from, const int to) HOST_WIDE_INT ix86_initial_elimination_offset (int from, int to) { - struct ix86_frame frame; - ix86_compute_frame_layout (&frame); + ix86_compute_frame_layout (); + struct ix86_frame &frame = cfun->machine->frame; if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) return frame.hard_frame_pointer_offset; @@ -12428,8 +12381,9 @@ ix86_builtin_setjmp_frame_value (void) /* Fill structure ix86_frame about frame of currently computed function. */ static void -ix86_compute_frame_layout (struct ix86_frame *frame) +ix86_compute_frame_layout (void) { + struct ix86_frame *frame = &cfun->machine->frame; unsigned HOST_WIDE_INT stack_alignment_needed; HOST_WIDE_INT offset; unsigned HOST_WIDE_INT preferred_alignment; @@ -13713,7 +13667,6 @@ ix86_expand_prologue (void) { struct machine_function *m = cfun->machine; rtx insn, t; - struct ix86_frame frame; HOST_WIDE_INT allocate; bool int_registers_saved; bool sse_registers_saved; @@ -13736,7 +13689,8 @@ ix86_expand_prologue (void) m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET; m->fs.sp_valid = true; - ix86_compute_frame_layout (&frame); + ix86_compute_frame_layout (); + struct ix86_frame &frame = cfun->machine->frame; if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl)) { @@ -14399,12 +14353,12 @@ ix86_expand_epilogue (int style) { struct machine_function *m = cfun->machine; struct machine_frame_state frame_state_save = m->fs; - struct ix86_frame frame; bool restore_regs_via_mov; bool using_drap; ix86_finalize_stack_realign_flags (); - ix86_compute_frame_layout (&frame); + ix86_compute_frame_layout (); + struct ix86_frame &frame = cfun->machine->frame; m->fs.sp_valid = (!frame_pointer_needed || (crtl->sp_is_unchanging @@ -14902,7 +14856,6 @@ static GTY(()) rtx split_stack_fn_large; void ix86_expand_split_stack_prologue (void) { - struct ix86_frame frame; HOST_WIDE_INT allocate; unsigned HOST_WIDE_INT args_size; rtx_code_label *label; @@ -14914,7 +14867,8 @@ ix86_expand_split_stack_prologue (void) gcc_assert (flag_split_stack && reload_completed); ix86_finalize_stack_realign_flags (); - ix86_compute_frame_layout (&frame); + ix86_compute_frame_layout (); + struct ix86_frame &frame = cfun->machine->frame; allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET; /* This is the label we will branch to if we have enough stack @@ -19147,16 +19101,17 @@ ix86_dirflag_mode_needed (rtx_insn *insn) return X86_DIRFLAG_ANY; } -/* Check if a 256bit AVX register is referenced inside of EXP. */ +/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */ static bool -ix86_check_avx256_register (const_rtx exp) +ix86_check_avx_upper_register (const_rtx exp) { if (SUBREG_P (exp)) exp = SUBREG_REG (exp); return (REG_P (exp) - && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))); + && (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)) + || VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp)))); } /* Return needed mode for entity in optimize_mode_switching pass. */ @@ -19169,7 +19124,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) rtx link; /* Needed mode is set to AVX_U128_CLEAN if there are - no 256bit modes used in function arguments. */ + no 256bit or 512bit modes used in function arguments. */ for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) @@ -19178,7 +19133,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) { rtx arg = XEXP (XEXP (link, 0), 0); - if (ix86_check_avx256_register (arg)) + if (ix86_check_avx_upper_register (arg)) return AVX_U128_DIRTY; } } @@ -19186,13 +19141,13 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) return AVX_U128_CLEAN; } - /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware - changes state only when a 256bit register is written to, but we need - to prevent the compiler from moving optimal insertion point above - eventual read from 256bit register. */ + /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced. + Hardware changes state only when a 256bit register is written to, + but we need to prevent the compiler from moving optimal insertion + point above eventual read from 256bit or 512 bit register. */ subrtx_iterator::array_type array; FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) - if (ix86_check_avx256_register (*iter)) + if (ix86_check_avx_upper_register (*iter)) return AVX_U128_DIRTY; return AVX_U128_ANY; @@ -19274,12 +19229,12 @@ ix86_mode_needed (int entity, rtx_insn *insn) return 0; } -/* Check if a 256bit AVX register is referenced in stores. */ +/* Check if a 256bit or 512bit AVX register is referenced in stores. */ static void -ix86_check_avx256_stores (rtx dest, const_rtx, void *data) +ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) { - if (ix86_check_avx256_register (dest)) + if (ix86_check_avx_upper_register (dest)) { bool *used = (bool *) data; *used = true; @@ -19298,18 +19253,18 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn) return AVX_U128_CLEAN; /* We know that state is clean after CALL insn if there are no - 256bit registers used in the function return register. */ + 256bit or 512bit registers used in the function return register. */ if (CALL_P (insn)) { - bool avx_reg256_found = false; - note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found); + bool avx_upper_reg_found = false; + note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found); - return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; + return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN; } /* Otherwise, return current mode. Remember that if insn - references AVX 256bit registers, the mode was already changed - to DIRTY from MODE_NEEDED. */ + references AVX 256bit or 512bit registers, the mode was already + changed to DIRTY from MODE_NEEDED. */ return mode; } @@ -19352,13 +19307,13 @@ ix86_avx_u128_mode_entry (void) tree arg; /* Entry mode is set to AVX_U128_DIRTY if there are - 256bit modes used in function arguments. */ + 256bit or 512bit modes used in function arguments. */ for (arg = DECL_ARGUMENTS (current_function_decl); arg; arg = TREE_CHAIN (arg)) { rtx incoming = DECL_INCOMING_RTL (arg); - if (incoming && ix86_check_avx256_register (incoming)) + if (incoming && ix86_check_avx_upper_register (incoming)) return AVX_U128_DIRTY; } @@ -19392,9 +19347,9 @@ ix86_avx_u128_mode_exit (void) { rtx reg = crtl->return_rtx; - /* Exit mode is set to AVX_U128_DIRTY if there are - 256bit modes used in the function return register. */ - if (reg && ix86_check_avx256_register (reg)) + /* Exit mode is set to AVX_U128_DIRTY if there are 256bit + or 512 bit modes used in the function return register. */ + if (reg && ix86_check_avx_upper_register (reg)) return AVX_U128_DIRTY; return AVX_U128_CLEAN; @@ -41878,7 +41833,8 @@ static void x86_print_call_or_nop (FILE *file, const char *target) { if (flag_nop_mcount) - fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */ + /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */ + fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n"); else fprintf (file, "1:\tcall\t%s\n", target); } @@ -45305,8 +45261,7 @@ ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) emit_move_insn (op0, ireg); } -/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the - result in OPERAND0. */ +/* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */ void ix86_expand_rint (rtx operand0, rtx operand1) { @@ -45314,11 +45269,17 @@ ix86_expand_rint (rtx operand0, rtx operand1) xa = fabs (operand1); if (!isless (xa, 2**52)) return operand1; - xa = xa + 2**52 - 2**52; + two52 = 2**52; + if (flag_rounding_math) + { + two52 = copysign (two52, operand1); + xa = operand1; + } + xa = xa + two52 - two52; return copysign (xa, operand1); */ machine_mode mode = GET_MODE (operand0); - rtx res, xa, TWO52, mask; + rtx res, xa, TWO52, two52, mask; rtx_code_label *label; res = gen_reg_rtx (mode); @@ -45331,8 +45292,16 @@ ix86_expand_rint (rtx operand0, rtx operand1) TWO52 = ix86_gen_TWO52 (mode); label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); - xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); - xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); + two52 = TWO52; + if (flag_rounding_math) + { + two52 = gen_reg_rtx (mode); + ix86_sse_copysign_to_positive (two52, TWO52, res, mask); + xa = res; + } + + xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT); + xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT); ix86_sse_copysign_to_positive (res, xa, res, mask); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 9e5f4d857d9..f9b91286a01 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -508,6 +508,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI] #define TARGET_ONE_IF_CONV_INSN \ ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN] +#define TARGET_EMIT_VZEROUPPER \ + ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { @@ -1121,6 +1123,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \ || (MODE) == V4TImode) +#define VALID_AVX512F_REG_OR_XI_MODE(MODE) \ + (VALID_AVX512F_REG_MODE (MODE) || (MODE) == XImode) + #define VALID_AVX512VL_128_REG_MODE(MODE) \ ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \ || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \ @@ -2446,9 +2451,56 @@ enum avx_u128_state #define FASTCALL_PREFIX '@' +#ifndef USED_FOR_TARGET +/* Structure describing stack frame layout. + Stack grows downward: + + [arguments] + <- ARG_POINTER + saved pc + + saved static chain if ix86_static_chain_on_stack + + saved frame pointer if frame_pointer_needed + <- HARD_FRAME_POINTER + [saved regs] + <- regs_save_offset + [padding0] + + [saved SSE regs] + <- sse_regs_save_offset + [padding1] | + | <- FRAME_POINTER + [va_arg registers] | + | + [frame] | + | + [padding2] | = to_allocate + <- STACK_POINTER + */ +struct GTY(()) ix86_frame +{ + int nsseregs; + int nregs; + int va_arg_size; + int red_zone_size; + int outgoing_arguments_size; + + /* The offsets relative to ARG_POINTER. */ + HOST_WIDE_INT frame_pointer_offset; + HOST_WIDE_INT hard_frame_pointer_offset; + HOST_WIDE_INT stack_pointer_offset; + HOST_WIDE_INT hfp_save_offset; + HOST_WIDE_INT reg_save_offset; + HOST_WIDE_INT sse_reg_save_offset; + + /* When save_regs_using_mov is set, emit prologue using + move instead of push instructions. */ + bool save_regs_using_mov; +}; + /* Machine specific frame tracking during prologue/epilogue generation. */ -#ifndef USED_FOR_TARGET struct GTY(()) machine_frame_state { /* This pair tracks the currently active CFA as reg+offset. When reg @@ -2507,6 +2559,9 @@ struct GTY(()) machine_function { int varargs_fpr_size; int optimize_mode_switching[MAX_386_ENTITIES]; + /* Cached initial frame layout for the current function. */ + struct ix86_frame frame; + /* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE has been computed for. */ int use_fast_prologue_epilogue_nregs; @@ -2589,6 +2644,7 @@ struct GTY(()) machine_function { #define ix86_current_function_calls_tls_descriptor \ (ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG)) #define ix86_static_chain_on_stack (cfun->machine->static_chain_on_stack) +#define ix86_red_zone_size (cfun->machine->frame.red_zone_size) /* Control behavior of x86_file_start. */ #define X86_FILE_START_VERSION_DIRECTIVE false diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0281bb5f06c..dbe88f40c8f 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -9641,7 +9641,7 @@ (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand") + (match_operand:SI 2 "register_operand" "c,r") (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands) @@ -9655,7 +9655,8 @@ (ashift:SWI48 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "operands[2] = gen_lowpart (QImode, operands[2]);") + "operands[2] = gen_lowpart (QImode, operands[2]);" + [(set_attr "isa" "*,bmi2")]) (define_insn "*bmi2_ashl<mode>3_1" [(set (match_operand:SWI48 0 "register_operand" "=r") @@ -10141,7 +10142,7 @@ (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand") + (match_operand:SI 2 "register_operand" "c,r") (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) @@ -10155,7 +10156,8 @@ (any_shiftrt:SWI48 (match_dup 1) (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] - "operands[2] = gen_lowpart (QImode, operands[2]);") + "operands[2] = gen_lowpart (QImode, operands[2]);" + [(set_attr "isa" "*,bmi2")]) (define_insn_and_split "*<shift_insn><mode>3_doubleword" [(set (match_operand:DWI 0 "register_operand" "=&r") @@ -10600,7 +10602,7 @@ (match_operand:SWI48 1 "nonimmediate_operand") (subreg:QI (and:SI - (match_operand:SI 2 "register_operand") + (match_operand:SI 2 "register_operand" "c") (match_operand:SI 3 "const_int_operand")) 0))) (clobber (reg:CC FLAGS_REG))] "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a95f529d6a9..80cda39bacf 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -13852,7 +13852,7 @@ (match_operand:DI 1 "nonimmediate_operand" " 0, 0,x ,Yv,r ,vm,?!*Yn,0,Yv,0,0,v") (match_operand:DI 2 "vector_move_operand" - "*rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))] + " rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))] "TARGET_SSE" "@ pinsrq\t{$1, %2, %0|%0, %2, 1} diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index c642f452e00..3b2701bb5f2 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -551,3 +551,7 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4) if-converted sequence to one. */ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC) + +/* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion + before a transfer of control flow out of the function. */ +DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL) diff --git a/gcc/config/i386/xm-mingw32.h b/gcc/config/i386/xm-mingw32.h index 6eb3a626f0c..d4d63f58322 100644 --- a/gcc/config/i386/xm-mingw32.h +++ b/gcc/config/i386/xm-mingw32.h @@ -37,8 +37,3 @@ along with GCC; see the file COPYING3. If not see "long long" values. Instead, we use "I64". */ #define HOST_LONG_LONG_FORMAT "I64" #endif - -/* this is to prevent gcc-heap.c from assuming sizeof(long) == sizeof(intptr_t) */ -#ifdef __x86_64__ -# define HOST_BITS_PER_PTR 64 -#endif |