#if (AO_GNUC_PREREQ(4, 8) || AO_CLANG_PREREQ(3, 5)) \
&& !defined(AO_DISABLE_GCC_ATOMICS) /* Probably, it could be enabled even for earlier gcc/clang versions. */ # define AO_GCC_ATOMIC_TEST_AND_SET #endif
#ifdefined(__thumb__) && !defined(__thumb2__) /* Thumb One mode does not have ARM "mcr", "swp" and some load/store */ /* instructions, so we temporarily switch to ARM mode and go back */ /* afterwards (clobbering "r3" register). */ # define AO_THUMB_GO_ARM \ " adr r3, 4f\n" \ " bx r3\n" \ " .align\n" \ " .arm\n" \
AO_BR_ALIGN \ "4:\n" # define AO_THUMB_RESTORE_MODE \ " adr r3, 5f + 1\n" \ " bx r3\n" \ " .thumb\n" \
AO_BR_ALIGN \ "5:\n" # define AO_THUMB_SWITCH_CLOBBERS "r3", #else # define AO_THUMB_GO_ARM /* empty */ # define AO_THUMB_RESTORE_MODE /* empty */ # define AO_THUMB_SWITCH_CLOBBERS /* empty */ #endif/* !__thumb__ */
/* NEC LE-IT: gcc has no way to easily check the arm architecture */ /* but it defines only one (or several) of __ARM_ARCH_x__ to be true. */ #if !defined(__ARM_ARCH_2__) && !defined(__ARM_ARCH_3__) \
&& !defined(__ARM_ARCH_3M__) && !defined(__ARM_ARCH_4__) \
&& !defined(__ARM_ARCH_4T__) \
&& ((!defined(__ARM_ARCH_5__) && !defined(__ARM_ARCH_5E__) \
&& !defined(__ARM_ARCH_5T__) && !defined(__ARM_ARCH_5TE__) \
&& !defined(__ARM_ARCH_5TEJ__) && !defined(__ARM_ARCH_6M__)) \
|| defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
|| defined(__ARM_ARCH_8A__)) # define AO_ARM_HAVE_LDREX # if !defined(__ARM_ARCH_6__) && !defined(__ARM_ARCH_6J__) \
&& !defined(__ARM_ARCH_6T2__) /* LDREXB/STREXB and LDREXH/STREXH are present in ARMv6K/Z+. */ # define AO_ARM_HAVE_LDREXBH # endif # if !defined(__ARM_ARCH_6__) && !defined(__ARM_ARCH_6J__) \
&& !defined(__ARM_ARCH_6T2__) && !defined(__ARM_ARCH_6Z__) \
&& !defined(__ARM_ARCH_6ZT2__) # if !defined(__ARM_ARCH_6K__) && !defined(__ARM_ARCH_6KZ__) \
&& !defined(__ARM_ARCH_6ZK__) /* DMB is present in ARMv6M and ARMv7+. */ # define AO_ARM_HAVE_DMB # endif # if (!defined(__thumb__) \
|| (defined(__thumb2__) && !defined(__ARM_ARCH_7__) \
&& !defined(__ARM_ARCH_7M__) && !defined(__ARM_ARCH_7EM__))) \
&& (!defined(__clang__) || AO_CLANG_PREREQ(3, 3)) /* LDREXD/STREXD present in ARMv6K/M+ (see gas/config/tc-arm.c). */ /* In the Thumb mode, this works only starting from ARMv7 (except */ /* for the base and 'M' models). Clang3.2 (and earlier) does not */ /* allocate register pairs for LDREXD/STREXD properly (besides, */ /* Clang3.1 does not support "%H<r>" operand specification). */ # define AO_ARM_HAVE_LDREXD # endif /* !thumb || ARMv7A || ARMv7R+ */ # endif /* ARMv7+ */ #endif/* ARMv6+ */
#if !defined(__ARM_ARCH_2__) && !defined(__ARM_ARCH_6M__) \
&& !defined(__ARM_ARCH_8A__) && !defined(__thumb2__) # define AO_ARM_HAVE_SWP /* Note: ARMv6M is excluded due to no ARM mode support. */ /* Also, SWP is obsoleted for ARMv8+. */ #endif/* !__thumb2__ */
#if !defined(AO_UNIPROCESSOR) && defined(AO_ARM_HAVE_DMB) \
&& !defined(AO_PREFER_BUILTIN_ATOMICS)
AO_INLINE void
AO_nop_write(void)
{ /* AO_THUMB_GO_ARM is empty. */ /* This will target the system domain and thus be overly */ /* conservative as the CPUs (even in case of big.LITTLE SoC) will */ /* occupy the inner shareable domain. */ /* The plain variant (dmb st) is theoretically slower, and should */ /* not be needed. That said, with limited experimentation, a CPU */ /* implementation for which it actually matters has not been found */ /* yet, though they should already exist. */ /* Anyway, note that the "st" and "ishst" barriers are actually */ /* quite weak and, as the libatomic_ops documentation states, */ /* usually not what you really want. */
__asm__ __volatile__("dmb ishst" : : : "memory");
} # define AO_HAVE_nop_write #endif/* AO_ARM_HAVE_DMB */
#ifndef AO_GCC_ATOMIC_TEST_AND_SET
#ifdef AO_UNIPROCESSOR /* If only a single processor (core) is used, AO_UNIPROCESSOR could */ /* be defined by the client to avoid unnecessary memory barrier. */
AO_INLINE void
AO_nop_full(void)
{
AO_compiler_barrier();
} # define AO_HAVE_nop_full
#elifdefined(AO_ARM_HAVE_DMB) /* ARMv7 is compatible to ARMv6 but has a simpler command for issuing */ /* a memory barrier (DMB). Raising it via CP15 should still work */ /* (but slightly less efficient because it requires the use of */ /* a general-purpose register). */
AO_INLINE void
AO_nop_full(void)
{ /* AO_THUMB_GO_ARM is empty. */
__asm__ __volatile__("dmb" : : : "memory");
} # define AO_HAVE_nop_full
#elifdefined(AO_ARM_HAVE_LDREX) /* ARMv6 is the first architecture providing support for a simple */ /* LL/SC. A data memory barrier must be raised via CP15 command. */
AO_INLINE void
AO_nop_full(void)
{ unsigned dest = 0;
/* Issue a data memory barrier (keeps ordering of memory */ /* transactions before and after this operation). */
__asm__ __volatile__("@AO_nop_full\n"
AO_THUMB_GO_ARM " mcr p15,0,%0,c7,c10,5\n"
AO_THUMB_RESTORE_MODE
: "=&r"(dest)
: /* empty */
: AO_THUMB_SWITCH_CLOBBERS "memory");
} # define AO_HAVE_nop_full
#else /* AO_nop_full() is emulated using AO_test_and_set_full(). */ #endif/* !AO_UNIPROCESSOR && !AO_ARM_HAVE_LDREX */
#endif/* !AO_GCC_ATOMIC_TEST_AND_SET */
#ifdef AO_ARM_HAVE_LDREX
/* "ARM Architecture Reference Manual" (chapter A3.5.3) says that the */ /* single-copy atomic processor accesses are all byte accesses, all */ /* halfword accesses to halfword-aligned locations, all word accesses */ /* to word-aligned locations. */ /* There is only a single concern related to AO store operations: */ /* a direct write (by STR[B/H] instruction) will not be recognized */ /* by the LL/SC construct on the same CPU (i.e., according to ARM */ /* documentation, e.g., see CortexA8 TRM reference, point 8.5, */ /* atomic "store" (using LDREX/STREX[B/H]) is the only safe way to */ /* set variables also used in LL/SC environment). */ /* This is only a problem if interrupt handlers do not clear the */ /* reservation (by CLREX instruction or a dummy STREX one), as they */ /* almost certainly should (e.g., see restore_user_regs defined in */ /* arch/arm/kernel/entry-header.S of Linux. Nonetheless, there is */ /* a doubt this was properly implemented in some ancient OS releases. */ # ifdef AO_BROKEN_TASKSWITCH_CLREX
# elif !defined(AO_GCC_ATOMIC_TEST_AND_SET) # include "../loadstore/atomic_store.h" /* AO_int_store is defined in ao_t_is_int.h. */ # endif /* !AO_BROKEN_TASKSWITCH_CLREX */
#endif/* AO_ARM_HAVE_LDREX */
#ifndef AO_GCC_ATOMIC_TEST_AND_SET
# include "../test_and_set_t_is_ao_t.h"/* Probably suboptimal */
#ifdef AO_ARM_HAVE_LDREX
/* AO_t/char/short/int load is simple reading. */ /* Unaligned accesses are not guaranteed to be atomic. */ # define AO_ACCESS_CHECK_ALIGNED # define AO_ACCESS_short_CHECK_ALIGNED # define AO_ACCESS_int_CHECK_ALIGNED # include "../all_atomic_only_load.h"
# ifndef AO_HAVE_char_store # include "../loadstore/char_atomic_store.h" # include "../loadstore/short_atomic_store.h" # endif
/* NEC LE-IT: replace the SWAP as recommended by ARM: "Appliesto:ARM11Cores ThoughtheSWPinstructionwillstillworkwithARMV6cores,itis recommendedtousethenewV6synchronizationinstructions.TheSWP instructionproduces'locked'readandwriteaccesseswhichareatomic, i.e.anotheroperationcannotbedonebetweentheselockedaccesseswhich tiesupexternalbus(AHB,AXI)bandwidthandcanincreaseworstcase interruptlatencies.LDREX,STREXaremoreflexible,otherinstructions canbedonebetweentheLDREXandSTREXaccesses."
*/ #ifndef AO_PREFER_GENERALIZED #if !defined(AO_FORCE_USE_SWP) || !defined(AO_ARM_HAVE_SWP) /* But, on the other hand, there could be a considerable performance */ /* degradation in case of a race. Eg., test_atomic.c executing */ /* test_and_set test on a dual-core ARMv7 processor using LDREX/STREX */ /* showed around 35 times lower performance than that using SWP. */ /* To force use of SWP instruction, use -D AO_FORCE_USE_SWP option */ /* (the latter is ignored if SWP instruction is unsupported). */
AO_INLINE AO_TS_VAL_t
AO_test_and_set(volatile AO_TS_t *addr)
{
AO_TS_VAL_t oldval; int flag;
__asm__ __volatile__("@AO_fetch_compare_and_swap\n"
AO_THUMB_GO_ARM
AO_BR_ALIGN "1: mov %0, #2\n"/* store a flag */
AO_MASK_PTR("%3") " ldrex %1, [%3]\n"/* get original */ " teq %1, %4\n"/* see if match */
AO_MASK_PTR("%3") # ifdef __thumb2__ " it eq\n" # endif " strexeq %0, %5, [%3]\n"/* store new one if matched */ " teq %0, #1\n" " beq 1b\n"/* if update failed, repeat */
AO_THUMB_RESTORE_MODE
: "=&r"(flag), "=&r"(fetched_val), "+m"(*addr)
: "r"(addr), "r"(old_val), "r"(new_val)
: AO_THUMB_SWITCH_CLOBBERS "cc"); return fetched_val;
} #define AO_HAVE_fetch_compare_and_swap
#ifdef AO_ARM_HAVE_LDREXD # include "../standard_ao_double_t.h"
/* "ARM Architecture Reference Manual ARMv7-A/R edition" (chapter */ /* A3.5.3) says that memory accesses caused by LDREXD and STREXD */ /* instructions to doubleword-aligned locations are single-copy */ /* atomic; accesses to 64-bit elements by other instructions might */ /* not be single-copy atomic as they are executed as a sequence of */ /* 32-bit accesses. */
AO_INLINE AO_double_t
AO_double_load(constvolatile AO_double_t *addr)
{
AO_double_t result;
AO_INLINE int
AO_double_compare_and_swap(volatile AO_double_t *addr,
AO_double_t old_val, AO_double_t new_val)
{
double_ptr_storage tmp; int result = 1;
do { /* AO_THUMB_GO_ARM is empty. */
__asm__ __volatile__("@AO_double_compare_and_swap\n"
AO_MASK_PTR("%1") " ldrexd %0, %H0, [%1]\n"/* get original to r1 & r2 */
: "=&r"(tmp)
: "r"(addr) /* : no clobber */); if (tmp != old_val.AO_whole) break;
__asm__ __volatile__(
AO_MASK_PTR("%2") " strexd %0, %3, %H3, [%2]\n"/* store new one if matched */
: "=&r"(result), "+m"(*addr)
: "r" (addr), "r" (new_val.AO_whole)
: "cc");
} while (AO_EXPECT_FALSE(result)); return !result; /* if succeeded then return 1 else 0 */
} # define AO_HAVE_double_compare_and_swap #endif/* AO_ARM_HAVE_LDREXD */
#else /* pre ARMv6 architectures ... */
/* I found a slide set that, if I read it correctly, claims that */ /* Loads followed by either a Load or Store are ordered, but nothing */ /* else is. */ /* It appears that SWP is the only simple memory barrier. */ #include"../all_aligned_atomic_load_store.h"
/* The code should run correctly on a multi-core ARMv6+ as well. */
#endif/* !AO_ARM_HAVE_LDREX */
#if !defined(AO_HAVE_test_and_set_full) && !defined(AO_HAVE_test_and_set) \
&& defined (AO_ARM_HAVE_SWP) && (!defined(AO_PREFER_GENERALIZED) \
|| !defined(AO_HAVE_fetch_compare_and_swap))
AO_INLINE AO_TS_VAL_t
AO_test_and_set_full(volatile AO_TS_t *addr)
{
AO_TS_VAL_t oldval; /* SWP on ARM is very similar to XCHG on x86. */ /* The first operand is the result, the second the value */ /* to be stored. Both registers must be different from addr. */ /* Make the address operand an early clobber output so it */ /* doesn't overlap with the other operands. The early clobber */ /* on oldval is necessary to prevent the compiler allocating */ /* them to the same register if they are both unused. */
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.