#ifndef _ASM_X86_BARRIER_H #define _ASM_X86_BARRIER_H #include #include /* * Force strict CPU ordering. * And yes, this is required on UP too when we're talking * to devices. */ #ifdef CONFIG_X86_32 /* * Some non-Intel clones support out of order store. wmb() ceases to be a * nop for these. */ #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) #else #define mb() asm volatile("mfence":::"memory") #define rmb() asm volatile("lfence":::"memory") #define wmb() asm volatile("sfence" ::: "memory") #endif /** * read_barrier_depends - Flush all pending reads that subsequents reads * depend on. * * No data-dependent reads from memory-like regions are ever reordered * over this barrier. All reads preceding this primitive are guaranteed * to access memory (but not necessarily other CPUs' caches) before any * reads following this primitive that depend on the data return by * any of the preceding reads. This primitive is much lighter weight than * rmb() on most CPUs, and is never heavier weight than is * rmb(). * * These ordering constraints are respected by both the local CPU * and the compiler. * * Ordering is not guaranteed by anything other than these primitives, * not even by data dependencies. See the documentation for * memory_barrier() for examples and URLs to more information. * * For example, the following code would force ordering (the initial * value of "a" is zero, "b" is one, and "p" is "&a"): * * * CPU 0 CPU 1 * * b = 2; * memory_barrier(); * p = &b; q = p; * read_barrier_depends(); * d = *q; * * * because the read of "*q" depends on the read of "p" and these * two reads are separated by a read_barrier_depends(). However, * the following code, with the same initial values for "a" and "b": * * * CPU 0 CPU 1 * * a = 2; * memory_barrier(); * b = 3; y = b; * read_barrier_depends(); * x = a; * * * does not enforce ordering, since there is no data dependency between * the read of "a" and the read of "b". Therefore, on some CPUs, such * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() * in cases like this where there are no data dependencies. **/ #define read_barrier_depends() do { } while (0) #ifdef CONFIG_SMP #define smp_mb() mb() #ifdef CONFIG_X86_PPRO_FENCE # define smp_rmb() rmb() #else # define smp_rmb() barrier() #endif #define smp_wmb() barrier() #define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) #else /* !SMP */ #define smp_mb() barrier() #define smp_rmb() barrier() #define smp_wmb() barrier() #define smp_read_barrier_depends() do { } while (0) #define set_mb(var, value) do { var = value; barrier(); } while (0) #endif /* SMP */ #if defined(CONFIG_X86_PPRO_FENCE) /* * For either of these options x86 doesn't have a strong TSO memory * model and we should fall back to full barriers. */ #define smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ACCESS_ONCE(*p) = (v); \ } while (0) #define smp_load_acquire(p) \ ({ \ typeof(*p) ___p1 = ACCESS_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ smp_mb(); \ ___p1; \ }) #else /* regular x86 TSO memory ordering */ #define smp_store_release(p, v) \ do { \ compiletime_assert_atomic_type(*p); \ barrier(); \ ACCESS_ONCE(*p) = (v); \ } while (0) #define smp_load_acquire(p) \ ({ \ typeof(*p) ___p1 = ACCESS_ONCE(*p); \ compiletime_assert_atomic_type(*p); \ barrier(); \ ___p1; \ }) #endif /* * Stop RDTSC speculation. This is needed when you need to use RDTSC * (or get_cycles or vread that possibly accesses the TSC) in a defined * code region. * * (Could use an alternative three way for this if there was one.) */ static __always_inline void rdtsc_barrier(void) { alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); } #endif /* _ASM_X86_BARRIER_H */