aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/i386.c157
-rw-r--r--gcc/config/i386/i386.h58
-rw-r--r--gcc/config/i386/i386.md12
-rw-r--r--gcc/config/i386/sse.md2
-rw-r--r--gcc/config/i386/x86-tune.def4
-rw-r--r--gcc/config/i386/xm-mingw32.h5
6 files changed, 132 insertions, 106 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cd9153efe57..2af4e9a2859 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2444,53 +2444,6 @@ struct GTY(()) stack_local_entry {
struct stack_local_entry *next;
};
-/* Structure describing stack frame layout.
- Stack grows downward:
-
- [arguments]
- <- ARG_POINTER
- saved pc
-
- saved static chain if ix86_static_chain_on_stack
-
- saved frame pointer if frame_pointer_needed
- <- HARD_FRAME_POINTER
- [saved regs]
- <- regs_save_offset
- [padding0]
-
- [saved SSE regs]
- <- sse_regs_save_offset
- [padding1] |
- | <- FRAME_POINTER
- [va_arg registers] |
- |
- [frame] |
- |
- [padding2] | = to_allocate
- <- STACK_POINTER
- */
-struct ix86_frame
-{
- int nsseregs;
- int nregs;
- int va_arg_size;
- int red_zone_size;
- int outgoing_arguments_size;
-
- /* The offsets relative to ARG_POINTER. */
- HOST_WIDE_INT frame_pointer_offset;
- HOST_WIDE_INT hard_frame_pointer_offset;
- HOST_WIDE_INT stack_pointer_offset;
- HOST_WIDE_INT hfp_save_offset;
- HOST_WIDE_INT reg_save_offset;
- HOST_WIDE_INT sse_reg_save_offset;
-
- /* When save_regs_using_mov is set, emit prologue using
- move instead of push instructions. */
- bool save_regs_using_mov;
-};
-
/* Which cpu are we scheduling for. */
enum attr_cpu ix86_schedule;
@@ -2582,7 +2535,7 @@ static unsigned int ix86_function_arg_boundary (machine_mode,
const_tree);
static rtx ix86_static_chain (const_tree, bool);
static int ix86_function_regparm (const_tree, const_tree);
-static void ix86_compute_frame_layout (struct ix86_frame *);
+static void ix86_compute_frame_layout (void);
static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
rtx, rtx, int);
static void ix86_add_new_builtins (HOST_WIDE_INT, HOST_WIDE_INT);
@@ -2686,7 +2639,7 @@ rest_of_handle_insert_vzeroupper (void)
int i;
/* vzeroupper instructions are inserted immediately after reload to
- account for possible spills from 256bit registers. The pass
+ account for possible spills from 256bit or 512bit registers. The pass
reuses mode switching infrastructure by re-running mode insertion
pass, so disable entities that have already been processed. */
for (i = 0; i < MAX_386_ENTITIES; i++)
@@ -4185,7 +4138,7 @@ public:
/* opt_pass methods: */
virtual bool gate (function *)
{
- return TARGET_AVX && !TARGET_AVX512F
+ return TARGET_AVX
&& TARGET_VZEROUPPER && flag_expensive_optimizations
&& !optimize_size;
}
@@ -6203,7 +6156,8 @@ ix86_option_override_internal (bool main_args_p,
#endif
}
- if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
+ if (!(opts_set->x_target_flags & MASK_VZEROUPPER)
+ && TARGET_EMIT_VZEROUPPER)
opts->x_target_flags |= MASK_VZEROUPPER;
if (!(opts_set->x_target_flags & MASK_STV))
opts->x_target_flags |= MASK_STV;
@@ -11889,8 +11843,6 @@ symbolic_reference_mentioned_p (rtx op)
bool
ix86_can_use_return_insn_p (void)
{
- struct ix86_frame frame;
-
/* Don't use `ret' instruction in interrupt handler. */
if (! reload_completed
|| frame_pointer_needed
@@ -11902,7 +11854,8 @@ ix86_can_use_return_insn_p (void)
if (crtl->args.pops_args && crtl->args.size >= 32768)
return 0;
- ix86_compute_frame_layout (&frame);
+ ix86_compute_frame_layout ();
+ struct ix86_frame &frame = cfun->machine->frame;
return (frame.stack_pointer_offset == UNITS_PER_WORD
&& (frame.nregs + frame.nsseregs) == 0);
}
@@ -12388,8 +12341,8 @@ ix86_can_eliminate (const int from, const int to)
HOST_WIDE_INT
ix86_initial_elimination_offset (int from, int to)
{
- struct ix86_frame frame;
- ix86_compute_frame_layout (&frame);
+ ix86_compute_frame_layout ();
+ struct ix86_frame &frame = cfun->machine->frame;
if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
return frame.hard_frame_pointer_offset;
@@ -12428,8 +12381,9 @@ ix86_builtin_setjmp_frame_value (void)
/* Fill structure ix86_frame about frame of currently computed function. */
static void
-ix86_compute_frame_layout (struct ix86_frame *frame)
+ix86_compute_frame_layout (void)
{
+ struct ix86_frame *frame = &cfun->machine->frame;
unsigned HOST_WIDE_INT stack_alignment_needed;
HOST_WIDE_INT offset;
unsigned HOST_WIDE_INT preferred_alignment;
@@ -13713,7 +13667,6 @@ ix86_expand_prologue (void)
{
struct machine_function *m = cfun->machine;
rtx insn, t;
- struct ix86_frame frame;
HOST_WIDE_INT allocate;
bool int_registers_saved;
bool sse_registers_saved;
@@ -13736,7 +13689,8 @@ ix86_expand_prologue (void)
m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
m->fs.sp_valid = true;
- ix86_compute_frame_layout (&frame);
+ ix86_compute_frame_layout ();
+ struct ix86_frame &frame = cfun->machine->frame;
if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
{
@@ -14399,12 +14353,12 @@ ix86_expand_epilogue (int style)
{
struct machine_function *m = cfun->machine;
struct machine_frame_state frame_state_save = m->fs;
- struct ix86_frame frame;
bool restore_regs_via_mov;
bool using_drap;
ix86_finalize_stack_realign_flags ();
- ix86_compute_frame_layout (&frame);
+ ix86_compute_frame_layout ();
+ struct ix86_frame &frame = cfun->machine->frame;
m->fs.sp_valid = (!frame_pointer_needed
|| (crtl->sp_is_unchanging
@@ -14902,7 +14856,6 @@ static GTY(()) rtx split_stack_fn_large;
void
ix86_expand_split_stack_prologue (void)
{
- struct ix86_frame frame;
HOST_WIDE_INT allocate;
unsigned HOST_WIDE_INT args_size;
rtx_code_label *label;
@@ -14914,7 +14867,8 @@ ix86_expand_split_stack_prologue (void)
gcc_assert (flag_split_stack && reload_completed);
ix86_finalize_stack_realign_flags ();
- ix86_compute_frame_layout (&frame);
+ ix86_compute_frame_layout ();
+ struct ix86_frame &frame = cfun->machine->frame;
allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
/* This is the label we will branch to if we have enough stack
@@ -19147,16 +19101,17 @@ ix86_dirflag_mode_needed (rtx_insn *insn)
return X86_DIRFLAG_ANY;
}
-/* Check if a 256bit AVX register is referenced inside of EXP. */
+/* Check if a 256bit or 512 bit AVX register is referenced inside of EXP. */
static bool
-ix86_check_avx256_register (const_rtx exp)
+ix86_check_avx_upper_register (const_rtx exp)
{
if (SUBREG_P (exp))
exp = SUBREG_REG (exp);
return (REG_P (exp)
- && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
+ && (VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp))
+ || VALID_AVX512F_REG_OR_XI_MODE (GET_MODE (exp))));
}
/* Return needed mode for entity in optimize_mode_switching pass. */
@@ -19169,7 +19124,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
rtx link;
/* Needed mode is set to AVX_U128_CLEAN if there are
- no 256bit modes used in function arguments. */
+ no 256bit or 512bit modes used in function arguments. */
for (link = CALL_INSN_FUNCTION_USAGE (insn);
link;
link = XEXP (link, 1))
@@ -19178,7 +19133,7 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
{
rtx arg = XEXP (XEXP (link, 0), 0);
- if (ix86_check_avx256_register (arg))
+ if (ix86_check_avx_upper_register (arg))
return AVX_U128_DIRTY;
}
}
@@ -19186,13 +19141,13 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
return AVX_U128_CLEAN;
}
- /* Require DIRTY mode if a 256bit AVX register is referenced. Hardware
- changes state only when a 256bit register is written to, but we need
- to prevent the compiler from moving optimal insertion point above
- eventual read from 256bit register. */
+ /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
+ Hardware changes state only when a 256bit register is written to,
+ but we need to prevent the compiler from moving optimal insertion
+ point above eventual read from 256bit or 512 bit register. */
subrtx_iterator::array_type array;
FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
- if (ix86_check_avx256_register (*iter))
+ if (ix86_check_avx_upper_register (*iter))
return AVX_U128_DIRTY;
return AVX_U128_ANY;
@@ -19274,12 +19229,12 @@ ix86_mode_needed (int entity, rtx_insn *insn)
return 0;
}
-/* Check if a 256bit AVX register is referenced in stores. */
+/* Check if a 256bit or 512bit AVX register is referenced in stores. */
static void
-ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
+ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
{
- if (ix86_check_avx256_register (dest))
+ if (ix86_check_avx_upper_register (dest))
{
bool *used = (bool *) data;
*used = true;
@@ -19298,18 +19253,18 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
return AVX_U128_CLEAN;
/* We know that state is clean after CALL insn if there are no
- 256bit registers used in the function return register. */
+ 256bit or 512bit registers used in the function return register. */
if (CALL_P (insn))
{
- bool avx_reg256_found = false;
- note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
+ bool avx_upper_reg_found = false;
+ note_stores (pat, ix86_check_avx_upper_stores, &avx_upper_reg_found);
- return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
+ return avx_upper_reg_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
}
/* Otherwise, return current mode. Remember that if insn
- references AVX 256bit registers, the mode was already changed
- to DIRTY from MODE_NEEDED. */
+ references AVX 256bit or 512bit registers, the mode was already
+ changed to DIRTY from MODE_NEEDED. */
return mode;
}
@@ -19352,13 +19307,13 @@ ix86_avx_u128_mode_entry (void)
tree arg;
/* Entry mode is set to AVX_U128_DIRTY if there are
- 256bit modes used in function arguments. */
+ 256bit or 512bit modes used in function arguments. */
for (arg = DECL_ARGUMENTS (current_function_decl); arg;
arg = TREE_CHAIN (arg))
{
rtx incoming = DECL_INCOMING_RTL (arg);
- if (incoming && ix86_check_avx256_register (incoming))
+ if (incoming && ix86_check_avx_upper_register (incoming))
return AVX_U128_DIRTY;
}
@@ -19392,9 +19347,9 @@ ix86_avx_u128_mode_exit (void)
{
rtx reg = crtl->return_rtx;
- /* Exit mode is set to AVX_U128_DIRTY if there are
- 256bit modes used in the function return register. */
- if (reg && ix86_check_avx256_register (reg))
+ /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
+ or 512 bit modes used in the function return register. */
+ if (reg && ix86_check_avx_upper_register (reg))
return AVX_U128_DIRTY;
return AVX_U128_CLEAN;
@@ -41878,7 +41833,8 @@ static void
x86_print_call_or_nop (FILE *file, const char *target)
{
if (flag_nop_mcount)
- fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop. */
+ /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
+ fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
else
fprintf (file, "1:\tcall\t%s\n", target);
}
@@ -45305,8 +45261,7 @@ ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
emit_move_insn (op0, ireg);
}
-/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
- result in OPERAND0. */
+/* Expand rint rounding OPERAND1 and storing the result in OPERAND0. */
void
ix86_expand_rint (rtx operand0, rtx operand1)
{
@@ -45314,11 +45269,17 @@ ix86_expand_rint (rtx operand0, rtx operand1)
xa = fabs (operand1);
if (!isless (xa, 2**52))
return operand1;
- xa = xa + 2**52 - 2**52;
+ two52 = 2**52;
+ if (flag_rounding_math)
+ {
+ two52 = copysign (two52, operand1);
+ xa = operand1;
+ }
+ xa = xa + two52 - two52;
return copysign (xa, operand1);
*/
machine_mode mode = GET_MODE (operand0);
- rtx res, xa, TWO52, mask;
+ rtx res, xa, TWO52, two52, mask;
rtx_code_label *label;
res = gen_reg_rtx (mode);
@@ -45331,8 +45292,16 @@ ix86_expand_rint (rtx operand0, rtx operand1)
TWO52 = ix86_gen_TWO52 (mode);
label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
- xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
- xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
+ two52 = TWO52;
+ if (flag_rounding_math)
+ {
+ two52 = gen_reg_rtx (mode);
+ ix86_sse_copysign_to_positive (two52, TWO52, res, mask);
+ xa = res;
+ }
+
+ xa = expand_simple_binop (mode, PLUS, xa, two52, NULL_RTX, 0, OPTAB_DIRECT);
+ xa = expand_simple_binop (mode, MINUS, xa, two52, xa, 0, OPTAB_DIRECT);
ix86_sse_copysign_to_positive (res, xa, res, mask);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 9e5f4d857d9..f9b91286a01 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -508,6 +508,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
#define TARGET_ONE_IF_CONV_INSN \
ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
+#define TARGET_EMIT_VZEROUPPER \
+ ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
@@ -1121,6 +1123,9 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|| (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \
|| (MODE) == V4TImode)
+#define VALID_AVX512F_REG_OR_XI_MODE(MODE) \
+ (VALID_AVX512F_REG_MODE (MODE) || (MODE) == XImode)
+
#define VALID_AVX512VL_128_REG_MODE(MODE) \
((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \
|| (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
@@ -2446,9 +2451,56 @@ enum avx_u128_state
#define FASTCALL_PREFIX '@'
+#ifndef USED_FOR_TARGET
+/* Structure describing stack frame layout.
+ Stack grows downward:
+
+ [arguments]
+ <- ARG_POINTER
+ saved pc
+
+ saved static chain if ix86_static_chain_on_stack
+
+ saved frame pointer if frame_pointer_needed
+ <- HARD_FRAME_POINTER
+ [saved regs]
+ <- regs_save_offset
+ [padding0]
+
+ [saved SSE regs]
+ <- sse_regs_save_offset
+ [padding1] |
+ | <- FRAME_POINTER
+ [va_arg registers] |
+ |
+ [frame] |
+ |
+ [padding2] | = to_allocate
+ <- STACK_POINTER
+ */
+struct GTY(()) ix86_frame
+{
+ int nsseregs;
+ int nregs;
+ int va_arg_size;
+ int red_zone_size;
+ int outgoing_arguments_size;
+
+ /* The offsets relative to ARG_POINTER. */
+ HOST_WIDE_INT frame_pointer_offset;
+ HOST_WIDE_INT hard_frame_pointer_offset;
+ HOST_WIDE_INT stack_pointer_offset;
+ HOST_WIDE_INT hfp_save_offset;
+ HOST_WIDE_INT reg_save_offset;
+ HOST_WIDE_INT sse_reg_save_offset;
+
+ /* When save_regs_using_mov is set, emit prologue using
+ move instead of push instructions. */
+ bool save_regs_using_mov;
+};
+
/* Machine specific frame tracking during prologue/epilogue generation. */
-#ifndef USED_FOR_TARGET
struct GTY(()) machine_frame_state
{
/* This pair tracks the currently active CFA as reg+offset. When reg
@@ -2507,6 +2559,9 @@ struct GTY(()) machine_function {
int varargs_fpr_size;
int optimize_mode_switching[MAX_386_ENTITIES];
+ /* Cached initial frame layout for the current function. */
+ struct ix86_frame frame;
+
/* Number of saved registers USE_FAST_PROLOGUE_EPILOGUE
has been computed for. */
int use_fast_prologue_epilogue_nregs;
@@ -2589,6 +2644,7 @@ struct GTY(()) machine_function {
#define ix86_current_function_calls_tls_descriptor \
(ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG))
#define ix86_static_chain_on_stack (cfun->machine->static_chain_on_stack)
+#define ix86_red_zone_size (cfun->machine->frame.red_zone_size)
/* Control behavior of x86_file_start. */
#define X86_FILE_START_VERSION_DIRECTIVE false
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 0281bb5f06c..dbe88f40c8f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9641,7 +9641,7 @@
(match_operand:SWI48 1 "nonimmediate_operand")
(subreg:QI
(and:SI
- (match_operand:SI 2 "register_operand")
+ (match_operand:SI 2 "register_operand" "c,r")
(match_operand:SI 3 "const_int_operand")) 0)))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
@@ -9655,7 +9655,8 @@
(ashift:SWI48 (match_dup 1)
(match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
- "operands[2] = gen_lowpart (QImode, operands[2]);")
+ "operands[2] = gen_lowpart (QImode, operands[2]);"
+ [(set_attr "isa" "*,bmi2")])
(define_insn "*bmi2_ashl<mode>3_1"
[(set (match_operand:SWI48 0 "register_operand" "=r")
@@ -10141,7 +10142,7 @@
(match_operand:SWI48 1 "nonimmediate_operand")
(subreg:QI
(and:SI
- (match_operand:SI 2 "register_operand")
+ (match_operand:SI 2 "register_operand" "c,r")
(match_operand:SI 3 "const_int_operand")) 0)))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
@@ -10155,7 +10156,8 @@
(any_shiftrt:SWI48 (match_dup 1)
(match_dup 2)))
(clobber (reg:CC FLAGS_REG))])]
- "operands[2] = gen_lowpart (QImode, operands[2]);")
+ "operands[2] = gen_lowpart (QImode, operands[2]);"
+ [(set_attr "isa" "*,bmi2")])
(define_insn_and_split "*<shift_insn><mode>3_doubleword"
[(set (match_operand:DWI 0 "register_operand" "=&r")
@@ -10600,7 +10602,7 @@
(match_operand:SWI48 1 "nonimmediate_operand")
(subreg:QI
(and:SI
- (match_operand:SI 2 "register_operand")
+ (match_operand:SI 2 "register_operand" "c")
(match_operand:SI 3 "const_int_operand")) 0)))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a95f529d6a9..80cda39bacf 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -13852,7 +13852,7 @@
(match_operand:DI 1 "nonimmediate_operand"
" 0, 0,x ,Yv,r ,vm,?!*Yn,0,Yv,0,0,v")
(match_operand:DI 2 "vector_move_operand"
- "*rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
+ " rm,rm,rm,rm,C ,C ,C ,x,Yv,x,m,m")))]
"TARGET_SSE"
"@
pinsrq\t{$1, %2, %0|%0, %2, 1}
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index c642f452e00..3b2701bb5f2 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -551,3 +551,7 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
if-converted sequence to one. */
DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
m_SILVERMONT | m_KNL | m_INTEL | m_CORE_ALL | m_GENERIC)
+
+/* X86_TUNE_EMIT_VZEROUPPER: This enables vzeroupper instruction insertion
+ before a transfer of control flow out of the function. */
+DEF_TUNE (X86_TUNE_EMIT_VZEROUPPER, "emit_vzeroupper", ~m_KNL)
diff --git a/gcc/config/i386/xm-mingw32.h b/gcc/config/i386/xm-mingw32.h
index 6eb3a626f0c..d4d63f58322 100644
--- a/gcc/config/i386/xm-mingw32.h
+++ b/gcc/config/i386/xm-mingw32.h
@@ -37,8 +37,3 @@ along with GCC; see the file COPYING3. If not see
"long long" values. Instead, we use "I64". */
#define HOST_LONG_LONG_FORMAT "I64"
#endif
-
-/* this is to prevent gcc-heap.c from assuming sizeof(long) == sizeof(intptr_t) */
-#ifdef __x86_64__
-# define HOST_BITS_PER_PTR 64
-#endif