aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/include/asm/fpu/types.h
blob: 0637826292deae64c7e6f0cbe10f4a6af2056cd5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
/*
 * FPU data structures:
 */
#ifndef _ASM_X86_FPU_H
#define _ASM_X86_FPU_H

/*
 * The legacy x87 FPU state format, as saved by FSAVE and
 * restored by the FRSTOR instructions:
 */
struct fregs_state {
	u32			cwd;	/* FPU Control Word		*/
	u32			swd;	/* FPU Status Word		*/
	u32			twd;	/* FPU Tag Word			*/
	u32			fip;	/* FPU IP Offset		*/
	u32			fcs;	/* FPU IP Selector		*/
	u32			foo;	/* FPU Operand Pointer Offset	*/
	u32			fos;	/* FPU Operand Pointer Selector	*/

	/* 8*10 bytes for each FP-reg = 80 bytes:			*/
	u32			st_space[20];

	/* Software status information [not touched by FSAVE]:		*/
	u32			status;
};

/*
 * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and
 * restored by the FXRSTOR instructions. It's similar to the FSAVE
 * format, but differs in some areas, plus has extensions at
 * the end for the XMM registers.
 */
struct fxregs_state {
	u16			cwd; /* Control Word			*/
	u16			swd; /* Status Word			*/
	u16			twd; /* Tag Word			*/
	u16			fop; /* Last Instruction Opcode		*/
	union {
		struct {
			u64	rip; /* Instruction Pointer		*/
			u64	rdp; /* Data Pointer			*/
		};
		struct {
			u32	fip; /* FPU IP Offset			*/
			u32	fcs; /* FPU IP Selector			*/
			u32	foo; /* FPU Operand Offset		*/
			u32	fos; /* FPU Operand Selector		*/
		};
	};
	u32			mxcsr;		/* MXCSR Register State */
	u32			mxcsr_mask;	/* MXCSR Mask		*/

	/* 8*16 bytes for each FP-reg = 128 bytes:			*/
	u32			st_space[32];

	/* 16*16 bytes for each XMM-reg = 256 bytes:			*/
	u32			xmm_space[64];

	u32			padding[12];

	union {
		u32		padding1[12];
		u32		sw_reserved[12];
	};

} __attribute__((aligned(16)));

/* Default value for fxregs_state.mxcsr: */
#define MXCSR_DEFAULT		0x1f80

/*
 * Software based FPU emulation state. This is arbitrary really,
 * it matches the x87 format to make it easier to understand:
 */
struct swregs_state {
	u32			cwd;
	u32			swd;
	u32			twd;
	u32			fip;
	u32			fcs;
	u32			foo;
	u32			fos;
	/* 8*10 bytes for each FP-reg = 80 bytes: */
	u32			st_space[20];
	u8			ftop;
	u8			changed;
	u8			lookahead;
	u8			no_update;
	u8			rm;
	u8			alimit;
	struct math_emu_info	*info;
	u32			entry_eip;
};

/*
 * List of XSAVE features Linux knows about:
 */
enum xfeature_bit {
	XSTATE_BIT_FP,
	XSTATE_BIT_SSE,
	XSTATE_BIT_YMM,
	XSTATE_BIT_BNDREGS,
	XSTATE_BIT_BNDCSR,
	XSTATE_BIT_OPMASK,
	XSTATE_BIT_ZMM_Hi256,
	XSTATE_BIT_Hi16_ZMM,

	XFEATURES_NR_MAX,
};

#define XSTATE_FP		(1 << XSTATE_BIT_FP)
#define XSTATE_SSE		(1 << XSTATE_BIT_SSE)
#define XSTATE_YMM		(1 << XSTATE_BIT_YMM)
#define XSTATE_BNDREGS		(1 << XSTATE_BIT_BNDREGS)
#define XSTATE_BNDCSR		(1 << XSTATE_BIT_BNDCSR)
#define XSTATE_OPMASK		(1 << XSTATE_BIT_OPMASK)
#define XSTATE_ZMM_Hi256	(1 << XSTATE_BIT_ZMM_Hi256)
#define XSTATE_Hi16_ZMM		(1 << XSTATE_BIT_Hi16_ZMM)

#define XSTATE_FPSSE		(XSTATE_FP | XSTATE_SSE)
#define XSTATE_AVX512		(XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)

/*
 * There are 16x 256-bit AVX registers named YMM0-YMM15.
 * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
 * and are stored in 'struct fxregs_state::xmm_space[]'.
 *
 * The high 128 bits are stored here:
 *    16x 128 bits == 256 bytes.
 */
struct ymmh_struct {
	u8				ymmh_space[256];
};

/* We don't support LWP yet: */
struct lwp_struct {
	u8				reserved[128];
};

/* Intel MPX support: */
struct bndreg {
	u64				lower_bound;
	u64				upper_bound;
} __packed;

struct bndcsr {
	u64				bndcfgu;
	u64				bndstatus;
} __packed;

struct mpx_struct {
	struct bndreg			bndreg[4];
	struct bndcsr			bndcsr;
};

struct xstate_header {
	u64				xfeatures;
	u64				xcomp_bv;
	u64				reserved[6];
} __attribute__((packed));

/* New processor state extensions should be added here: */
#define XSTATE_RESERVE			(sizeof(struct ymmh_struct) + \
					 sizeof(struct lwp_struct)  + \
					 sizeof(struct mpx_struct)  )
/*
 * This is our most modern FPU state format, as saved by the XSAVE
 * and restored by the XRSTOR instructions.
 *
 * It consists of a legacy fxregs portion, an xstate header and
 * subsequent fixed size areas as defined by the xstate header.
 * Not all CPUs support all the extensions.
 */
struct xregs_state {
	struct fxregs_state		i387;
	struct xstate_header		header;
	u8				__reserved[XSTATE_RESERVE];
} __attribute__ ((packed, aligned (64)));

/*
 * This is a union of all the possible FPU state formats
 * put together, so that we can pick the right one runtime.
 *
 * The size of the structure is determined by the largest
 * member - which is the xsave area:
 */
union fpregs_state {
	struct fregs_state		fsave;
	struct fxregs_state		fxsave;
	struct swregs_state		soft;
	struct xregs_state		xsave;
};

/*
 * Highest level per task FPU state data structure that
 * contains the FPU register state plus various FPU
 * state fields:
 */
struct fpu {
	/*
	 * @state:
	 *
	 * In-memory copy of all FPU registers that we save/restore
	 * over context switches. If the task is using the FPU then
	 * the registers in the FPU are more recent than this state
	 * copy. If the task context-switches away then they get
	 * saved here and represent the FPU state.
	 *
	 * After context switches there may be a (short) time period
	 * during which the in-FPU hardware registers are unchanged
	 * and still perfectly match this state, if the tasks
	 * scheduled afterwards are not using the FPU.
	 *
	 * This is the 'lazy restore' window of optimization, which
	 * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
	 *
	 * We detect whether a subsequent task uses the FPU via setting
	 * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
	 *
	 * During this window, if the task gets scheduled again, we
	 * might be able to skip having to do a restore from this
	 * memory buffer to the hardware registers - at the cost of
	 * incurring the overhead of #NM fault traps.
	 *
	 * Note that on modern CPUs that support the XSAVEOPT (or other
	 * optimized XSAVE instructions), we don't use #NM traps anymore,
	 * as the hardware can track whether FPU registers need saving
	 * or not. On such CPUs we activate the non-lazy ('eagerfpu')
	 * logic, which unconditionally saves/restores all FPU state
	 * across context switches. (if FPU state exists.)
	 */
	union fpregs_state		state;

	/*
	 * @last_cpu:
	 *
	 * Records the last CPU on which this context was loaded into
	 * FPU registers. (In the lazy-restore case we might be
	 * able to reuse FPU registers across multiple context switches
	 * this way, if no intermediate task used the FPU.)
	 *
	 * A value of -1 is used to indicate that the FPU state in context
	 * memory is newer than the FPU state in registers, and that the
	 * FPU state should be reloaded next time the task is run.
	 */
	unsigned int			last_cpu;

	/*
	 * @fpstate_active:
	 *
	 * This flag indicates whether this context is active: if the task
	 * is not running then we can restore from this context, if the task
	 * is running then we should save into this context.
	 */
	unsigned char			fpstate_active;

	/*
	 * @fpregs_active:
	 *
	 * This flag determines whether a given context is actively
	 * loaded into the FPU's registers and that those registers
	 * represent the task's current FPU state.
	 *
	 * Note the interaction with fpstate_active:
	 *
	 *   # task does not use the FPU:
	 *   fpstate_active == 0
	 *
	 *   # task uses the FPU and regs are active:
	 *   fpstate_active == 1 && fpregs_active == 1
	 *
	 *   # the regs are inactive but still match fpstate:
	 *   fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
	 *
	 * The third state is what we use for the lazy restore optimization
	 * on lazy-switching CPUs.
	 */
	unsigned char			fpregs_active;

	/*
	 * @counter:
	 *
	 * This counter contains the number of consecutive context switches
	 * during which the FPU stays used. If this is over a threshold, the
	 * lazy FPU restore logic becomes eager, to save the trap overhead.
	 * This is an unsigned char so that after 256 iterations the counter
	 * wraps and the context switch behavior turns lazy again; this is to
	 * deal with bursty apps that only use the FPU for a short time:
	 */
	unsigned char			counter;
};

#endif /* _ASM_X86_FPU_H */