arch/alpha/include/asm/fpu.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195

#ifndef __ASM_ALPHA_FPU_H
#define __ASM_ALPHA_FPU_H

#include <asm/special_insns.h>

/*
 * Alpha floating-point control register defines:
 */
#define FPCR_DNOD	(1UL<<47)	/* denorm INV trap disable */
#define FPCR_DNZ	(1UL<<48)	/* denorms to zero */
#define FPCR_INVD	(1UL<<49)	/* invalid op disable (opt.) */
#define FPCR_DZED	(1UL<<50)	/* division by zero disable (opt.) */
#define FPCR_OVFD	(1UL<<51)	/* overflow disable (optional) */
#define FPCR_INV	(1UL<<52)	/* invalid operation */
#define FPCR_DZE	(1UL<<53)	/* division by zero */
#define FPCR_OVF	(1UL<<54)	/* overflow */
#define FPCR_UNF	(1UL<<55)	/* underflow */
#define FPCR_INE	(1UL<<56)	/* inexact */
#define FPCR_IOV	(1UL<<57)	/* integer overflow */
#define FPCR_UNDZ	(1UL<<60)	/* underflow to zero (opt.) */
#define FPCR_UNFD	(1UL<<61)	/* underflow disable (opt.) */
#define FPCR_INED	(1UL<<62)	/* inexact disable (opt.) */
#define FPCR_SUM	(1UL<<63)	/* summary bit */

#define FPCR_DYN_SHIFT	58		/* first dynamic rounding mode bit */
#define FPCR_DYN_CHOPPED (0x0UL << FPCR_DYN_SHIFT)	/* towards 0 */
#define FPCR_DYN_MINUS	 (0x1UL << FPCR_DYN_SHIFT)	/* towards -INF */
#define FPCR_DYN_NORMAL	 (0x2UL << FPCR_DYN_SHIFT)	/* towards nearest */
#define FPCR_DYN_PLUS	 (0x3UL << FPCR_DYN_SHIFT)	/* towards +INF */
#define FPCR_DYN_MASK	 (0x3UL << FPCR_DYN_SHIFT)

#define FPCR_MASK	0xffff800000000000L

/*
 * IEEE trap enables are implemented in software.  These per-thread
 * bits are stored in the "ieee_state" field of "struct thread_info".
 * Thus, the bits are defined so as not to conflict with the
 * floating-point enable bit (which is architected).  On top of that,
 * we want to make these bits compatible with OSF/1 so
 * ieee_set_fp_control() etc. can be implemented easily and
 * compatibly.  The corresponding definitions are in
 * /usr/include/machine/fpu.h under OSF/1.
 */
#define IEEE_TRAP_ENABLE_INV	(1UL<<1)	/* invalid op */
#define IEEE_TRAP_ENABLE_DZE	(1UL<<2)	/* division by zero */
#define IEEE_TRAP_ENABLE_OVF	(1UL<<3)	/* overflow */
#define IEEE_TRAP_ENABLE_UNF	(1UL<<4)	/* underflow */
#define IEEE_TRAP_ENABLE_INE	(1UL<<5)	/* inexact */
#define IEEE_TRAP_ENABLE_DNO	(1UL<<6)	/* denorm */
#define IEEE_TRAP_ENABLE_MASK	(IEEE_TRAP_ENABLE_INV | IEEE_TRAP_ENABLE_DZE |\
				 IEEE_TRAP_ENABLE_OVF | IEEE_TRAP_ENABLE_UNF |\
				 IEEE_TRAP_ENABLE_INE | IEEE_TRAP_ENABLE_DNO)

/* Denorm and Underflow flushing */
#define IEEE_MAP_DMZ		(1UL<<12)	/* Map denorm inputs to zero */
#define IEEE_MAP_UMZ		(1UL<<13)	/* Map underflowed outputs to zero */

#define IEEE_MAP_MASK		(IEEE_MAP_DMZ | IEEE_MAP_UMZ)

/* status bits coming from fpcr: */
#define IEEE_STATUS_INV		(1UL<<17)
#define IEEE_STATUS_DZE		(1UL<<18)
#define IEEE_STATUS_OVF		(1UL<<19)
#define IEEE_STATUS_UNF		(1UL<<20)
#define IEEE_STATUS_INE		(1UL<<21)
#define IEEE_STATUS_DNO		(1UL<<22)

#define IEEE_STATUS_MASK	(IEEE_STATUS_INV | IEEE_STATUS_DZE |	\
				 IEEE_STATUS_OVF | IEEE_STATUS_UNF |	\
				 IEEE_STATUS_INE | IEEE_STATUS_DNO)

#define IEEE_SW_MASK		(IEEE_TRAP_ENABLE_MASK |		\
				 IEEE_STATUS_MASK | IEEE_MAP_MASK)

#define IEEE_CURRENT_RM_SHIFT	32
#define IEEE_CURRENT_RM_MASK	(3UL<<IEEE_CURRENT_RM_SHIFT)

#define IEEE_STATUS_TO_EXCSUM_SHIFT	16

#define IEEE_INHERIT    (1UL<<63)	/* inherit on thread create? */

/*
 * Convert the software IEEE trap enable and status bits into the
 * hardware fpcr format. 
 *
 * Digital Unix engineers receive my thanks for not defining the
 * software bits identical to the hardware bits.  The chip designers
 * receive my thanks for making all the not-implemented fpcr bits
 * RAZ forcing us to use system calls to read/write this value.
 */

static inline unsigned long
ieee_swcr_to_fpcr(unsigned long sw)
{
	unsigned long fp;
	fp = (sw & IEEE_STATUS_MASK) << 35;
	fp |= (sw & IEEE_MAP_DMZ) << 36;
	fp |= (sw & IEEE_STATUS_MASK ? FPCR_SUM : 0);
	fp |= (~sw & (IEEE_TRAP_ENABLE_INV
		      | IEEE_TRAP_ENABLE_DZE
		      | IEEE_TRAP_ENABLE_OVF)) << 48;
	fp |= (~sw & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE)) << 57;
	fp |= (sw & IEEE_MAP_UMZ ? FPCR_UNDZ | FPCR_UNFD : 0);
	fp |= (~sw & IEEE_TRAP_ENABLE_DNO) << 41;
	return fp;
}

static inline unsigned long
ieee_fpcr_to_swcr(unsigned long fp)
{
	unsigned long sw;
	sw = (fp >> 35) & IEEE_STATUS_MASK;
	sw |= (fp >> 36) & IEEE_MAP_DMZ;
	sw |= (~fp >> 48) & (IEEE_TRAP_ENABLE_INV
			     | IEEE_TRAP_ENABLE_DZE
			     | IEEE_TRAP_ENABLE_OVF);
	sw |= (~fp >> 57) & (IEEE_TRAP_ENABLE_UNF | IEEE_TRAP_ENABLE_INE);
	sw |= (fp >> 47) & IEEE_MAP_UMZ;
	sw |= (~fp >> 41) & IEEE_TRAP_ENABLE_DNO;
	return sw;
}

#ifdef __KERNEL__

/* The following two functions don't need trapb/excb instructions
   around the mf_fpcr/mt_fpcr instructions because (a) the kernel
   never generates arithmetic faults and (b) call_pal instructions
   are implied trap barriers.  */

static inline unsigned long
rdfpcr(void)
{
	unsigned long tmp, ret;

#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
	__asm__ __volatile__ (
		"ftoit $f0,%0\n\t"
		"mf_fpcr $f0\n\t"
		"ftoit $f0,%1\n\t"
		"itoft %0,$f0"
		: "=r"(tmp), "=r"(ret));
#else
	__asm__ __volatile__ (
		"stt $f0,%0\n\t"
		"mf_fpcr $f0\n\t"
		"stt $f0,%1\n\t"
		"ldt $f0,%0"
		: "=m"(tmp), "=m"(ret));
#endif

	return ret;
}

static inline void
wrfpcr(unsigned long val)
{
	unsigned long tmp;

#if defined(CONFIG_ALPHA_EV6) || defined(CONFIG_ALPHA_EV67)
	__asm__ __volatile__ (
		"ftoit $f0,%0\n\t"
		"itoft %1,$f0\n\t"
		"mt_fpcr $f0\n\t"
		"itoft %0,$f0"
		: "=&r"(tmp) : "r"(val));
#else
	__asm__ __volatile__ (
		"stt $f0,%0\n\t"
		"ldt $f0,%1\n\t"
		"mt_fpcr $f0\n\t"
		"ldt $f0,%0"
		: "=m"(tmp) : "m"(val));
#endif
}

static inline unsigned long
swcr_update_status(unsigned long swcr, unsigned long fpcr)
{
	/* EV6 implements most of the bits in hardware.  Collect
	   the acrued exception bits from the real fpcr.  */
	if (implver() == IMPLVER_EV6) {
		swcr &= ~IEEE_STATUS_MASK;
		swcr |= (fpcr >> 35) & IEEE_STATUS_MASK;
	}
	return swcr;
}

extern unsigned long alpha_read_fp_reg (unsigned long reg);
extern void alpha_write_fp_reg (unsigned long reg, unsigned long val);
extern unsigned long alpha_read_fp_reg_s (unsigned long reg);
extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val);

#endif /* __KERNEL__ */

#endif /* __ASM_ALPHA_FPU_H */