OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /openrisc/tags/gnu-dev/fsf-gcc-snapshot-1-mar-12/or1k-gcc/libgcc/config/tilepro
    from Rev 734 to Rev 783
    Reverse comparison

Rev 734 → Rev 783

/t-tilepro
0,0 → 1,33
LIB2ADD += \
$(srcdir)/config/tilepro/softmpy.S \
$(srcdir)/config/tilepro/atomic.c
 
LIB2FUNCS_EXCLUDE += \
_divdi3 \
_moddi3 \
_muldi3 \
_udivdi3 \
_umoddi3
 
SOFTDIVIDE_FUNCS := \
_tile_udivsi3 \
_tile_divsi3 \
_tile_udivdi3 \
_tile_divdi3 \
_tile_umodsi3 \
_tile_modsi3 \
_tile_umoddi3 \
_tile_moddi3
 
softdivide-o = $(patsubst %,%$(objext),$(SOFTDIVIDE_FUNCS))
$(softdivide-o): %$(objext): $(srcdir)/config/tilepro/softdivide.c
$(gcc_compile) -ffunction-sections -DMAYBE_STATIC= -DL$* -c $< \
$(vis_hide)
libgcc-objects += $(softdivide-o)
 
ifeq ($(enable_shared),yes)
softdivide-s-o = $(patsubst %,%_s$(objext),$(SOFTDIVIDE_FUNCS))
$(softdivide-s-o): %_s$(objext): $(srcdir)/config/tilepro/softdivide.c
$(gcc_s_compile) -ffunction-sections -DMAYBE_STATIC= -DL$* -c $<
libgcc-s-objects += $(softdivide-s-o)
endif
/sfp-machine.h
0,0 → 1,56
#define _FP_W_TYPE_SIZE 32
#define _FP_W_TYPE unsigned long
#define _FP_WS_TYPE signed long
#define _FP_I_TYPE long
 
/* The type of the result of a floating point comparison. This must
match `__libgcc_cmp_return__' in GCC for the target. */
typedef int __gcc_CMPtype __attribute__ ((mode (__libgcc_cmp_return__)));
#define CMPtype __gcc_CMPtype
 
#define _FP_MUL_MEAT_S(R,X,Y) \
_FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm)
#define _FP_MUL_MEAT_D(R,X,Y) \
_FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm)
#define _FP_MUL_MEAT_Q(R,X,Y) \
_FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q,R,X,Y,umul_ppmm)
 
#define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_loop(S,R,X,Y)
#define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y)
#define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_4_udiv(Q,R,X,Y)
 
#define _FP_NANFRAC_S _FP_QNANBIT_S
#define _FP_NANFRAC_D _FP_QNANBIT_D, 0
#define _FP_NANFRAC_Q _FP_QNANBIT_Q, 0, 0, 0
#define _FP_NANSIGN_S 1
#define _FP_NANSIGN_D 1
#define _FP_NANSIGN_Q 1
 
#define _FP_KEEPNANFRACP 1
 
#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \
do { \
if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \
&& !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) \
{ \
R##_s = Y##_s; \
_FP_FRAC_COPY_##wc(R,Y); \
} \
else \
{ \
R##_s = X##_s; \
_FP_FRAC_COPY_##wc(R,X); \
} \
R##_c = FP_CLS_NAN; \
} while (0)
 
#define __LITTLE_ENDIAN 1234
#define __BIG_ENDIAN 4321
 
#define __BYTE_ORDER __LITTLE_ENDIAN
 
/* Define ALIASNAME as a strong alias for NAME. */
# define strong_alias(name, aliasname) _strong_alias(name, aliasname)
# define _strong_alias(name, aliasname) \
extern __typeof (name) aliasname __attribute__ ((alias (#name)));
 
/t-crtstuff
0,0 → 1,4
# crtend*.o cannot be compiled without -fno-asynchronous-unwind-tables,
# because then __FRAME_END__ might not be the last thing in .eh_frame
# section.
CRTSTUFF_T_CFLAGS += -fno-asynchronous-unwind-tables
/softdivide.c
0,0 → 1,354
/* Division and remainder routines for Tile.
Copyright (C) 2011, 2012
Free Software Foundation, Inc.
Contributed by Walter Lee (walt@tilera.com)
 
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
 
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
typedef int int32_t;
typedef unsigned uint32_t;
typedef long long int64_t;
typedef unsigned long long uint64_t;
 
/* Raise signal 8 (SIGFPE) with code 1 (FPE_INTDIV). */
static inline void
raise_intdiv (void)
{
asm ("{ raise; moveli zero, 8 + (1 << 6) }");
}
 
 
#ifndef __tilegx__
/*__udivsi3 - 32 bit integer unsigned divide */
static inline uint32_t __attribute__ ((always_inline))
__udivsi3_inline (uint32_t dividend, uint32_t divisor)
{
/* Divide out any power of two factor from dividend and divisor.
Note that when dividing by zero the divisor will remain zero,
which is all we need to detect that case below. */
const int power_of_two_factor = __insn_ctz (divisor);
divisor >>= power_of_two_factor;
dividend >>= power_of_two_factor;
 
/* Checks for division by power of two or division by zero. */
if (divisor <= 1)
{
if (divisor == 0)
{
raise_intdiv ();
return 0;
}
return dividend;
}
 
/* Compute (a / b) by repeatedly finding the largest N
such that (b << N) <= a. For each such N, set bit N in the
quotient, subtract (b << N) from a, and keep going. Think of this as
the reverse of the "shift-and-add" that a multiply does. The values
of N are precisely those shift counts.
 
Finding N is easy. First, use clz(b) - clz(a) to find the N
that lines up the high bit of (b << N) with the high bit of a.
Any larger value of N would definitely make (b << N) > a,
which is too big.
 
Then, if (b << N) > a (because it has larger low bits), decrement
N by one. This adjustment will definitely make (b << N) less
than a, because a's high bit is now one higher than b's. */
 
/* Precomputing the max_ values allows us to avoid a subtract
in the inner loop and just right shift by clz(remainder). */
const int divisor_clz = __insn_clz (divisor);
const uint32_t max_divisor = divisor << divisor_clz;
const uint32_t max_qbit = 1 << divisor_clz;
 
uint32_t quotient = 0;
uint32_t remainder = dividend;
 
while (remainder >= divisor)
{
int shift = __insn_clz (remainder);
uint32_t scaled_divisor = max_divisor >> shift;
uint32_t quotient_bit = max_qbit >> shift;
 
int too_big = (scaled_divisor > remainder);
scaled_divisor >>= too_big;
quotient_bit >>= too_big;
remainder -= scaled_divisor;
quotient |= quotient_bit;
}
return quotient;
}
#endif /* !__tilegx__ */
 
 
/* __udivdi3 - 64 bit integer unsigned divide */
static inline uint64_t __attribute__ ((always_inline))
__udivdi3_inline (uint64_t dividend, uint64_t divisor)
{
/* Divide out any power of two factor from dividend and divisor.
Note that when dividing by zero the divisor will remain zero,
which is all we need to detect that case below. */
const int power_of_two_factor = __builtin_ctzll (divisor);
divisor >>= power_of_two_factor;
dividend >>= power_of_two_factor;
 
/* Checks for division by power of two or division by zero. */
if (divisor <= 1)
{
if (divisor == 0)
{
raise_intdiv ();
return 0;
}
return dividend;
}
 
#ifndef __tilegx__
if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
{
/* Operands both fit in 32 bits, so use faster 32 bit algorithm. */
return __udivsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
}
#endif /* !__tilegx__ */
 
/* See algorithm description in __udivsi3 */
 
const int divisor_clz = __builtin_clzll (divisor);
const uint64_t max_divisor = divisor << divisor_clz;
const uint64_t max_qbit = 1ULL << divisor_clz;
 
uint64_t quotient = 0;
uint64_t remainder = dividend;
 
while (remainder >= divisor)
{
int shift = __builtin_clzll (remainder);
uint64_t scaled_divisor = max_divisor >> shift;
uint64_t quotient_bit = max_qbit >> shift;
 
int too_big = (scaled_divisor > remainder);
scaled_divisor >>= too_big;
quotient_bit >>= too_big;
remainder -= scaled_divisor;
quotient |= quotient_bit;
}
return quotient;
}
 
 
#ifndef __tilegx__
/* __umodsi3 - 32 bit integer unsigned modulo */
static inline uint32_t __attribute__ ((always_inline))
__umodsi3_inline (uint32_t dividend, uint32_t divisor)
{
/* Shortcircuit mod by a power of two (and catch mod by zero). */
const uint32_t mask = divisor - 1;
if ((divisor & mask) == 0)
{
if (divisor == 0)
{
raise_intdiv ();
return 0;
}
return dividend & mask;
}
 
/* We compute the remainder (a % b) by repeatedly subtracting off
multiples of b from a until a < b. The key is that subtracting
off a multiple of b does not affect the result mod b.
 
To make the algorithm run efficiently, we need to subtract
off a large multiple of b at each step. We subtract the largest
(b << N) that is <= a.
 
Finding N is easy. First, use clz(b) - clz(a) to find the N
that lines up the high bit of (b << N) with the high bit of a.
Any larger value of N would definitely make (b << N) > a,
which is too big.
 
Then, if (b << N) > a (because it has larger low bits), decrement
N by one. This adjustment will definitely make (b << N) less
than a, because a's high bit is now one higher than b's. */
const uint32_t max_divisor = divisor << __insn_clz (divisor);
 
uint32_t remainder = dividend;
while (remainder >= divisor)
{
const int shift = __insn_clz (remainder);
uint32_t scaled_divisor = max_divisor >> shift;
scaled_divisor >>= (scaled_divisor > remainder);
remainder -= scaled_divisor;
}
 
return remainder;
}
#endif /* !__tilegx__ */
 
 
/* __umoddi3 - 64 bit integer unsigned modulo */
static inline uint64_t __attribute__ ((always_inline))
__umoddi3_inline (uint64_t dividend, uint64_t divisor)
{
#ifndef __tilegx__
if (((uint32_t) (dividend >> 32) | ((uint32_t) (divisor >> 32))) == 0)
{
/* Operands both fit in 32 bits, so use faster 32 bit algorithm. */
return __umodsi3_inline ((uint32_t) dividend, (uint32_t) divisor);
}
#endif /* !__tilegx__ */
 
/* Shortcircuit mod by a power of two (and catch mod by zero). */
const uint64_t mask = divisor - 1;
if ((divisor & mask) == 0)
{
if (divisor == 0)
{
raise_intdiv ();
return 0;
}
return dividend & mask;
}
 
/* See algorithm description in __umodsi3 */
const uint64_t max_divisor = divisor << __builtin_clzll (divisor);
 
uint64_t remainder = dividend;
while (remainder >= divisor)
{
const int shift = __builtin_clzll (remainder);
uint64_t scaled_divisor = max_divisor >> shift;
scaled_divisor >>= (scaled_divisor > remainder);
remainder -= scaled_divisor;
}
 
return remainder;
}
 
 
uint32_t __udivsi3 (uint32_t dividend, uint32_t divisor);
#ifdef L_tile_udivsi3
uint32_t
__udivsi3 (uint32_t dividend, uint32_t divisor)
{
#ifndef __tilegx__
return __udivsi3_inline (dividend, divisor);
#else /* !__tilegx__ */
uint64_t n = __udivdi3_inline (((uint64_t) dividend), ((uint64_t) divisor));
return (uint32_t) n;
#endif /* !__tilegx__ */
}
#endif
 
#define ABS(x) ((x) >= 0 ? (x) : -(x))
 
int32_t __divsi3 (int32_t dividend, int32_t divisor);
#ifdef L_tile_divsi3
/* __divsi3 - 32 bit integer signed divide */
int32_t
__divsi3 (int32_t dividend, int32_t divisor)
{
#ifndef __tilegx__
uint32_t n = __udivsi3_inline (ABS (dividend), ABS (divisor));
#else /* !__tilegx__ */
uint64_t n =
__udivdi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
#endif /* !__tilegx__ */
if ((dividend ^ divisor) < 0)
n = -n;
return (int32_t) n;
}
#endif
 
 
uint64_t __udivdi3 (uint64_t dividend, uint64_t divisor);
#ifdef L_tile_udivdi3
uint64_t
__udivdi3 (uint64_t dividend, uint64_t divisor)
{
return __udivdi3_inline (dividend, divisor);
}
#endif
 
/*__divdi3 - 64 bit integer signed divide */
int64_t __divdi3 (int64_t dividend, int64_t divisor);
#ifdef L_tile_divdi3
int64_t
__divdi3 (int64_t dividend, int64_t divisor)
{
uint64_t n = __udivdi3_inline (ABS (dividend), ABS (divisor));
if ((dividend ^ divisor) < 0)
n = -n;
return (int64_t) n;
}
#endif
 
 
uint32_t __umodsi3 (uint32_t dividend, uint32_t divisor);
#ifdef L_tile_umodsi3
uint32_t
__umodsi3 (uint32_t dividend, uint32_t divisor)
{
#ifndef __tilegx__
return __umodsi3_inline (dividend, divisor);
#else /* !__tilegx__ */
return __umoddi3_inline ((uint64_t) dividend, (uint64_t) divisor);
#endif /* !__tilegx__ */
}
#endif
 
 
/* __modsi3 - 32 bit integer signed modulo */
int32_t __modsi3 (int32_t dividend, int32_t divisor);
#ifdef L_tile_modsi3
int32_t
__modsi3 (int32_t dividend, int32_t divisor)
{
#ifndef __tilegx__
uint32_t remainder = __umodsi3_inline (ABS (dividend), ABS (divisor));
#else /* !__tilegx__ */
uint64_t remainder =
__umoddi3_inline (ABS ((int64_t) dividend), ABS ((int64_t) divisor));
#endif /* !__tilegx__ */
return (int32_t) ((dividend >= 0) ? remainder : -remainder);
}
#endif
 
 
uint64_t __umoddi3 (uint64_t dividend, uint64_t divisor);
#ifdef L_tile_umoddi3
uint64_t
__umoddi3 (uint64_t dividend, uint64_t divisor)
{
return __umoddi3_inline (dividend, divisor);
}
#endif
 
 
/* __moddi3 - 64 bit integer signed modulo */
int64_t __moddi3 (int64_t dividend, int64_t divisor);
#ifdef L_tile_moddi3
int64_t
__moddi3 (int64_t dividend, int64_t divisor)
{
uint64_t remainder = __umoddi3_inline (ABS (dividend), ABS (divisor));
return (int64_t) ((dividend >= 0) ? remainder : -remainder);
}
#endif
/softmpy.S
0,0 → 1,95
/* 64-bit multiplication support for TILEPro.
Copyright (C) 2011, 2012
Free Software Foundation, Inc.
Contributed by Walter Lee (walt@tilera.com)
 
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
 
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
/* 64-bit multiplication support. */
 
.file "softmpy.S"
 
/* Parameters */
#define lo0 r9 /* low 32 bits of n0 */
#define hi0 r1 /* high 32 bits of n0 */
#define lo1 r2 /* low 32 bits of n1 */
#define hi1 r3 /* high 32 bits of n1 */
 
/* temps */
#define result1_a r4
#define result1_b r5
 
#define tmp0 r6
#define tmp0_left_16 r7
#define tmp1 r8
 
.section .text.__muldi3, "ax"
.align 8
.globl __muldi3
.type __muldi3, @function
__muldi3:
{
move lo0, r0 /* so we can write "out r0" while "in r0" alive */
mulhl_uu tmp0, lo1, r0
}
{
mulll_uu result1_a, lo1, hi0
}
{
move tmp1, tmp0
mulhla_uu tmp0, lo0, lo1
}
{
mulhlsa_uu result1_a, lo1, hi0
}
{
mulll_uu result1_b, lo0, hi1
slt_u tmp1, tmp0, tmp1
}
{
mulhlsa_uu result1_a, lo0, hi1
shli r0, tmp0, 16
}
{
move tmp0_left_16, r0
mulhha_uu result1_b, lo0, lo1
}
{
mullla_uu r0, lo1, lo0
shli tmp1, tmp1, 16
}
{
mulhlsa_uu result1_b, hi0, lo1
inthh tmp1, tmp1, tmp0
}
{
mulhlsa_uu result1_a, hi1, lo0
slt_u tmp0, r0, tmp0_left_16
}
/* NOTE: this will stall for a cycle here. Oh well. */
{
add r1, tmp0, tmp1
add result1_a, result1_a, result1_b
}
{
add r1, r1, result1_a
jrp lr
}
.size __muldi3,.-__muldi3
/atomic.h
0,0 → 1,428
/* Macros for atomic functionality for tile.
Copyright (C) 2011, 2012
Free Software Foundation, Inc.
Contributed by Walter Lee (walt@tilera.com)
 
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
 
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
 
/* Provides macros for common atomic functionality. */
 
#ifndef _ATOMIC_H_
#define _ATOMIC_H_
 
#ifdef __tilegx__
/* Atomic instruction macros
 
The macros provided by atomic.h simplify access to the TILE-Gx
architecture's atomic instructions. The architecture provides a
variety of atomic instructions, including "exchange", "compare and
exchange", "fetch and ADD", "fetch and AND", "fetch and OR", and
"fetch and ADD if greater than or equal to zero".
 
No barrier or fence semantics are implied by any of the atomic
instructions for manipulating memory; you must specify the barriers
that you wish explicitly, using the provided macros.
 
Any integral 32- or 64-bit value can be used as the argument
to these macros, such as "int", "long long", "unsigned long", etc.
The pointers must be aligned to 4 or 8 bytes for 32- or 64-bit data.
The "exchange" and "compare and exchange" macros may also take
pointer values. We use the pseudo-type "VAL" in the documentation
to indicate the use of an appropriate type. */
#else
/* Atomic instruction macros
 
The macros provided by atomic.h simplify access to the Tile
architecture's atomic instructions. Since the architecture
supports test-and-set as its only in-silicon atomic operation, many
of the operations provided by this header are implemented as
fast-path calls to Linux emulation routines.
 
Using the kernel for atomic operations allows userspace to take
advantage of the kernel's existing atomic-integer support (managed
by a distributed array of locks). The kernel provides proper
ordering among simultaneous atomic operations on different cores,
and guarantees a process can not be context-switched part way
through an atomic operation. By virtue of sharing the kernel
atomic implementation, the userspace atomic operations
are compatible with the atomic methods provided by the kernel's
futex() syscall API. Note that these operations never cause Linux
kernel scheduling, and are in fact invisible to the kernel; they
simply act as regular function calls but with an elevated privilege
level. Note that the kernel's distributed lock array is hashed by
using only VA bits from the atomic value's address (to avoid the
performance hit of page table locking and multiple page-table
lookups to get the PA) and only the VA bits that are below page
granularity (to properly lock simultaneous accesses to the same
page mapped at different VAs). As a result, simultaneous atomic
operations on values whose addresses are at the same offset on a
page will contend in the kernel for the same lock array element.
 
No barrier or fence semantics are implied by any of the atomic
instructions for manipulating memory; you must specify the barriers
that you wish explicitly, using the provided macros.
 
Any integral 32- or 64-bit value can be used as the argument
to these macros, such as "int", "long long", "unsigned long", etc.
The pointers must be aligned to 4 or 8 bytes for 32- or 64-bit data.
The "exchange" and "compare and exchange" macros may also take
pointer values. We use the pseudo-type "VAL" in the documentation
to indicate the use of an appropriate type.
 
The 32-bit routines are implemented using a single kernel fast
syscall, as is the 64-bit compare-and-exchange. The other 64-bit
routines are implemented by looping over the 64-bit
compare-and-exchange routine, so may be potentially less efficient. */
#endif
 
#include <stdint.h>
#include <features.h>
#ifdef __tilegx__
#include <arch/spr_def.h>
#else
#include <asm/unistd.h>
#endif
 
 
/* 32-bit integer compare-and-exchange. */
static __inline __attribute__ ((always_inline))
int atomic_val_compare_and_exchange_4 (volatile int *mem,
int oldval, int newval)
{
#ifdef __tilegx__
__insn_mtspr (SPR_CMPEXCH_VALUE, oldval);
return __insn_cmpexch4 (mem, newval);
#else
int result;
__asm__ __volatile__ ("swint1":"=R00" (result),
"=m" (*mem):"R10" (__NR_FAST_cmpxchg), "R00" (mem),
"R01" (oldval), "R02" (newval), "m" (*mem):"r20",
"r21", "r22", "r23", "r24", "r25", "r26", "r27",
"r28", "r29", "memory");
return result;
#endif
}
 
/* 64-bit integer compare-and-exchange. */
static __inline __attribute__ ((always_inline))
int64_t atomic_val_compare_and_exchange_8 (volatile int64_t * mem,
int64_t oldval,
int64_t newval)
{
#ifdef __tilegx__
__insn_mtspr (SPR_CMPEXCH_VALUE, oldval);
return __insn_cmpexch (mem, newval);
#else
unsigned int result_lo, result_hi;
unsigned int oldval_lo = oldval & 0xffffffffu, oldval_hi = oldval >> 32;
unsigned int newval_lo = newval & 0xffffffffu, newval_hi = newval >> 32;
__asm__ __volatile__ ("swint1":"=R00" (result_lo), "=R01" (result_hi),
"=m" (*mem):"R10" (__NR_FAST_cmpxchg64), "R00" (mem),
"R02" (oldval_lo), "R03" (oldval_hi),
"R04" (newval_lo), "R05" (newval_hi),
"m" (*mem):"r20", "r21", "r22", "r23", "r24", "r25",
"r26", "r27", "r28", "r29", "memory");
return ((uint64_t) result_hi) << 32 | result_lo;
#endif
}
 
/* This non-existent symbol is called for sizes other than "4" and "8",
indicating a bug in the caller. */
extern int __atomic_error_bad_argument_size (void)
__attribute__ ((warning ("sizeof atomic argument not 4 or 8")));
 
 
#define atomic_val_compare_and_exchange(mem, o, n) \
({ \
(__typeof(*(mem)))(__typeof(*(mem)-*(mem))) \
((sizeof(*(mem)) == 8) ? \
atomic_val_compare_and_exchange_8( \
(volatile int64_t*)(mem), (__typeof((o)-(o)))(o), \
(__typeof((n)-(n)))(n)) : \
(sizeof(*(mem)) == 4) ? \
atomic_val_compare_and_exchange_4( \
(volatile int*)(mem), (__typeof((o)-(o)))(o), \
(__typeof((n)-(n)))(n)) : \
__atomic_error_bad_argument_size()); \
})
 
#define atomic_bool_compare_and_exchange(mem, o, n) \
({ \
__typeof(o) __o = (o); \
__builtin_expect( \
__o == atomic_val_compare_and_exchange((mem), __o, (n)), 1); \
})
 
 
/* Loop with compare_and_exchange until we guess the correct value.
Normally "expr" will be an expression using __old and __value. */
#define __atomic_update_cmpxchg(mem, value, expr) \
({ \
__typeof(value) __value = (value); \
__typeof(*(mem)) *__mem = (mem), __old = *__mem, __guess; \
do { \
__guess = __old; \
__old = atomic_val_compare_and_exchange(__mem, __old, (expr)); \
} while (__builtin_expect(__old != __guess, 0)); \
__old; \
})
 
#ifdef __tilegx__
 
/* Generic atomic op with 8- or 4-byte variant.
The _mask, _addend, and _expr arguments are ignored on tilegx. */
#define __atomic_update(mem, value, op, _mask, _addend, _expr) \
({ \
((__typeof(*(mem))) \
((sizeof(*(mem)) == 8) ? (__typeof(*(mem)-*(mem)))__insn_##op( \
(void *)(mem), (int64_t)(__typeof((value)-(value)))(value)) : \
(sizeof(*(mem)) == 4) ? (int)__insn_##op##4( \
(void *)(mem), (int32_t)(__typeof((value)-(value)))(value)) : \
__atomic_error_bad_argument_size())); \
})
 
#else
 
/* This uses TILEPro's fast syscall support to atomically compute:
 
int old = *ptr;
*ptr = (old & mask) + addend;
return old;
 
This primitive can be used for atomic exchange, add, or, and.
Only 32-bit support is provided. */
static __inline __attribute__ ((always_inline))
int
__atomic_update_4 (volatile int *mem, int mask, int addend)
{
int result;
__asm__ __volatile__ ("swint1":"=R00" (result),
"=m" (*mem):"R10" (__NR_FAST_atomic_update),
"R00" (mem), "R01" (mask), "R02" (addend),
"m" (*mem):"r20", "r21", "r22", "r23", "r24", "r25",
"r26", "r27", "r28", "r29", "memory");
return result;
}
 
/* Generic atomic op with 8- or 4-byte variant.
The _op argument is ignored on tilepro. */
#define __atomic_update(mem, value, _op, mask, addend, expr) \
({ \
(__typeof(*(mem)))(__typeof(*(mem)-*(mem))) \
((sizeof(*(mem)) == 8) ? \
__atomic_update_cmpxchg((mem), (value), (expr)) : \
(sizeof(*(mem)) == 4) ? \
__atomic_update_4((volatile int*)(mem), (__typeof((mask)-(mask)))(mask), \
(__typeof((addend)-(addend)))(addend)) : \
__atomic_error_bad_argument_size()); \
})
 
#endif /* __tilegx__ */
 
 
#define atomic_exchange(mem, newvalue) \
__atomic_update(mem, newvalue, exch, 0, newvalue, __value)
 
#define atomic_add(mem, value) \
__atomic_update(mem, value, fetchadd, -1, value, __old + __value)
 
#define atomic_sub(mem, value) atomic_add((mem), -(value))
 
#define atomic_increment(mem) atomic_add((mem), 1)
 
#define atomic_decrement(mem) atomic_add((mem), -1)
 
#define atomic_and(mem, mask) \
__atomic_update(mem, mask, fetchand, mask, 0, __old & __value)
 
#define atomic_or(mem, mask) \
__atomic_update(mem, mask, fetchor, ~mask, mask, __old | __value)
 
#define atomic_bit_set(mem, bit) \
({ \
__typeof(*(mem)) __mask = (__typeof(*(mem)))1 << (bit); \
__mask & atomic_or((mem), __mask); \
})
 
#define atomic_bit_clear(mem, bit) \
({ \
__typeof(*(mem)) __mask = (__typeof(*(mem)))1 << (bit); \
__mask & atomic_and((mem), ~__mask); \
})
 
#ifdef __tilegx__
/* Atomically store a new value to memory.
Note that you can freely use types of any size here, unlike the
other atomic routines, which require 32- or 64-bit types.
This accessor is provided for compatibility with TILEPro, which
required an explicit atomic operation for stores that needed
to be atomic with respect to other atomic methods in this header. */
#define atomic_write(mem, value) ((void) (*(mem) = (value)))
#else
#define atomic_write(mem, value) \
do { \
__typeof(mem) __aw_mem = (mem); \
__typeof(value) __aw_val = (value); \
unsigned int *__aw_mem32, __aw_intval, __aw_val32, __aw_off, __aw_mask; \
__aw_intval = (__typeof((value) - (value)))__aw_val; \
switch (sizeof(*__aw_mem)) { \
case 8: \
__atomic_update_cmpxchg(__aw_mem, __aw_val, __value); \
break; \
case 4: \
__atomic_update_4((int *)__aw_mem, 0, __aw_intval); \
break; \
case 2: \
__aw_off = 8 * ((long)__aw_mem & 0x2); \
__aw_mask = 0xffffU << __aw_off; \
__aw_mem32 = (unsigned int *)((long)__aw_mem & ~0x2); \
__aw_val32 = (__aw_intval << __aw_off) & __aw_mask; \
__atomic_update_cmpxchg(__aw_mem32, __aw_val32, \
(__old & ~__aw_mask) | __value); \
break; \
case 1: \
__aw_off = 8 * ((long)__aw_mem & 0x3); \
__aw_mask = 0xffU << __aw_off; \
__aw_mem32 = (unsigned int *)((long)__aw_mem & ~0x3); \
__aw_val32 = (__aw_intval << __aw_off) & __aw_mask; \
__atomic_update_cmpxchg(__aw_mem32, __aw_val32, \
(__old & ~__aw_mask) | __value); \
break; \
} \
} while (0)
#endif
 
/* Compiler barrier.
 
This macro prevents loads or stores from being moved by the compiler
across the macro. Any loaded value that was loaded before this
macro must then be reloaded by the compiler. */
#define atomic_compiler_barrier() __asm__ __volatile__("" ::: "memory")
 
/* Full memory barrier.
 
This macro has the semantics of atomic_compiler_barrer(), but also
ensures that previous stores are visible to other cores, and that
all previous loaded values have been placed into their target
register on this core. */
#define atomic_full_barrier() __insn_mf()
 
/* Read memory barrier.
 
Ensure that all reads by this processor that occurred prior to the
read memory barrier have completed, and that no reads that occur
after the read memory barrier on this processor are initiated
before the barrier.
 
On current TILE chips a read barrier is implemented as a full barrier,
but this may not be true in later versions of the architecture.
 
See also atomic_acquire_barrier() for the appropriate idiom to use
to ensure no reads are lifted above an atomic lock instruction. */
#define atomic_read_barrier() atomic_full_barrier()
 
/* Write memory barrier.
 
Ensure that all writes by this processor that occurred prior to the
write memory barrier have completed, and that no writes that occur
after the write memory barrier on this processor are initiated
before the barrier.
 
On current TILE chips a write barrier is implemented as a full barrier,
but this may not be true in later versions of the architecture.
 
See also atomic_release_barrier() for the appropriate idiom to use
to ensure all writes are complete prior to an atomic unlock instruction. */
#define atomic_write_barrier() atomic_full_barrier()
 
/* Lock acquisition barrier.
 
Ensure that no load operations that follow this macro in the
program can issue prior to the barrier. Without such a barrier,
the compiler can reorder them to issue earlier, or the hardware can
issue them speculatively. The latter is not currently done in the
Tile microarchitecture, but using this operation improves
portability to future implementations.
 
This operation is intended to be used as part of the "acquire"
path for locking, that is, when entering a critical section.
This should be done after the atomic operation that actually
acquires the lock, and in conjunction with a "control dependency"
that checks the atomic operation result to see if the lock was
in fact acquired. See the atomic_read_barrier() macro
for a heavier-weight barrier to use in certain unusual constructs,
or atomic_acquire_barrier_value() if no control dependency exists. */
#define atomic_acquire_barrier() atomic_compiler_barrier()
 
/* Lock release barrier.
 
Ensure that no store operations that precede this macro in the
program complete subsequent to the barrier. Without such a
barrier, the compiler can reorder stores to issue later, or stores
can be still outstanding in the memory network.
 
This operation is intended to be used as part of the "release" path
for locking, that is, when leaving a critical section. This should
be done before the operation (such as a store of zero) that
actually releases the lock. */
#define atomic_release_barrier() atomic_write_barrier()
 
/* Barrier until the read of a particular value is complete.
 
This is occasionally useful when constructing certain locking
scenarios. For example, you might write a routine that issues an
atomic instruction to enter a critical section, then reads one or
more values within the critical section without checking to see if
the critical section was in fact acquired, and only later checks
the atomic instruction result to see if the lock was acquired. If
so the routine could properly release the lock and know that the
values that were read were valid.
 
In this scenario, it is required to wait for the result of the
atomic instruction, even if the value itself is not checked. This
guarantees that if the atomic instruction succeeded in taking the lock,
the lock was held before any reads in the critical section issued. */
#define atomic_acquire_barrier_value(val) \
__asm__ __volatile__("move %0, %0" :: "r"(val))
 
/* Access the given variable in memory exactly once.
 
In some contexts, an algorithm may need to force access to memory,
since otherwise the compiler may think it can optimize away a
memory load or store; for example, in a loop when polling memory to
see if another cpu has updated it yet. Generally this is only
required for certain very carefully hand-tuned algorithms; using it
unnecessarily may result in performance losses.
 
A related use of this macro is to ensure that the compiler does not
rematerialize the value of "x" by reloading it from memory
unexpectedly; the "volatile" marking will prevent the compiler from
being able to rematerialize. This is helpful if an algorithm needs
to read a variable without locking, but needs it to have the same
value if it ends up being used several times within the algorithm.
 
Note that multiple uses of this macro are guaranteed to be ordered,
i.e. the compiler will not reorder stores or loads that are wrapped
in atomic_access_once(). */
#define atomic_access_once(x) (*(volatile __typeof(x) *)&(x))
 
 
#endif /* !_ATOMIC_H_ */
/linux-unwind.h
0,0 → 1,100
/* DWARF2 EH unwinding support for TILEPro.
Copyright (C) 2011, 2012
Free Software Foundation, Inc.
Contributed by Walter Lee (walt@tilera.com)
 
This file is part of GCC.
 
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
 
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#ifndef inhibit_libc
 
#include <arch/abi.h>
#include <signal.h>
#include <sys/ucontext.h>
#include <linux/unistd.h>
 
/* Macro to define a copy of the kernel's __rt_sigreturn function
(in arch/tile/kernel/entry.S). If that function is changed,
this one needs to be changed to match it. */
#define _sigreturn_asm(REG, NR) asm( \
".pushsection .text.__rt_sigreturn,\"a\"\n" \
".global __rt_sigreturn\n" \
".type __rt_sigreturn,@function\n" \
"__rt_sigreturn:\n" \
"moveli " #REG ", " #NR "\n" \
"swint1\n" \
".size __rt_sigreturn, . - __rt_sigreturn\n" \
".popsection")
#define sigreturn_asm(REG, NR) _sigreturn_asm(REG, NR)
sigreturn_asm (TREG_SYSCALL_NR_NAME, __NR_rt_sigreturn);
#define SIGRETURN_LEN 16
extern char __rt_sigreturn[];
 
#define MD_FALLBACK_FRAME_STATE_FOR tile_fallback_frame_state
 
static _Unwind_Reason_Code
tile_fallback_frame_state (struct _Unwind_Context *context,
_Unwind_FrameState *fs)
{
unsigned char *pc = context->ra;
struct sigcontext *sc;
long new_cfa;
int i;
 
struct rt_sigframe {
unsigned char save_area[C_ABI_SAVE_AREA_SIZE];
struct siginfo info;
struct ucontext uc;
} *rt_;
 
/* Return if this is not a signal handler. */
if (memcmp (pc, __rt_sigreturn, SIGRETURN_LEN) != 0)
return _URC_END_OF_STACK;
 
/* It was a signal handler; update the reported PC to point to our
copy, since that will be findable with dladdr() and therefore
somewhat easier to help understand what actually happened. */
context->ra = __rt_sigreturn;
 
rt_ = context->cfa;
sc = &rt_->uc.uc_mcontext;
 
new_cfa = sc->sp;
fs->regs.cfa_how = CFA_REG_OFFSET;
fs->regs.cfa_reg = STACK_POINTER_REGNUM;
fs->regs.cfa_offset = new_cfa - (long) context->cfa;
 
for (i = 0; i < 56; ++i)
{
fs->regs.reg[i].how = REG_SAVED_OFFSET;
fs->regs.reg[i].loc.offset
= (long)&sc->gregs[i] - new_cfa;
}
 
fs->regs.reg[56].how = REG_SAVED_OFFSET;
fs->regs.reg[56].loc.offset = (long)&sc->pc - new_cfa;
fs->retaddr_column = 56;
fs->signal_frame = 1;
 
return _URC_NO_REASON;
}
 
#endif /* ifdef inhibit_libc */
/atomic.c
0,0 → 1,232
/* TILE atomics.
Copyright (C) 2011, 2012
Free Software Foundation, Inc.
Contributed by Walter Lee (walt@tilera.com)
 
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
 
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include "system.h"
#include "coretypes.h"
#include "atomic.h"
 
/* This code should be inlined by the compiler, but for now support
it as out-of-line methods in libgcc. */
 
static void
pre_atomic_barrier (int model)
{
switch ((enum memmodel) model)
{
case MEMMODEL_RELEASE:
case MEMMODEL_ACQ_REL:
case MEMMODEL_SEQ_CST:
__atomic_thread_fence (model);
break;
default:
break;
}
return;
}
 
static void
post_atomic_barrier (int model)
{
switch ((enum memmodel) model)
{
case MEMMODEL_ACQUIRE:
case MEMMODEL_ACQ_REL:
case MEMMODEL_SEQ_CST:
__atomic_thread_fence (model);
break;
default:
break;
}
return;
}
 
#define __unused __attribute__((unused))
 
/* Provide additional methods not implemented by atomic.h. */
#define atomic_xor(mem, mask) \
__atomic_update_cmpxchg(mem, mask, __old ^ __value)
#define atomic_nand(mem, mask) \
__atomic_update_cmpxchg(mem, mask, ~(__old & __value))
 
#define __atomic_fetch_and_do(type, size, opname) \
type \
__atomic_fetch_##opname##_##size(type* p, type i, int model) \
{ \
pre_atomic_barrier(model); \
type rv = atomic_##opname(p, i); \
post_atomic_barrier(model); \
return rv; \
}
 
__atomic_fetch_and_do (int, 4, add)
__atomic_fetch_and_do (int, 4, sub)
__atomic_fetch_and_do (int, 4, or)
__atomic_fetch_and_do (int, 4, and)
__atomic_fetch_and_do (int, 4, xor)
__atomic_fetch_and_do (int, 4, nand)
__atomic_fetch_and_do (long long, 8, add)
__atomic_fetch_and_do (long long, 8, sub)
__atomic_fetch_and_do (long long, 8, or)
__atomic_fetch_and_do (long long, 8, and)
__atomic_fetch_and_do (long long, 8, xor)
__atomic_fetch_and_do (long long, 8, nand)
#define __atomic_do_and_fetch(type, size, opname, op) \
type \
__atomic_##opname##_fetch_##size(type* p, type i, int model) \
{ \
pre_atomic_barrier(model); \
type rv = atomic_##opname(p, i) op i; \
post_atomic_barrier(model); \
return rv; \
}
__atomic_do_and_fetch (int, 4, add, +)
__atomic_do_and_fetch (int, 4, sub, -)
__atomic_do_and_fetch (int, 4, or, |)
__atomic_do_and_fetch (int, 4, and, &)
__atomic_do_and_fetch (int, 4, xor, |)
__atomic_do_and_fetch (int, 4, nand, &)
__atomic_do_and_fetch (long long, 8, add, +)
__atomic_do_and_fetch (long long, 8, sub, -)
__atomic_do_and_fetch (long long, 8, or, |)
__atomic_do_and_fetch (long long, 8, and, &)
__atomic_do_and_fetch (long long, 8, xor, |)
__atomic_do_and_fetch (long long, 8, nand, &)
#define __atomic_exchange_methods(type, size) \
bool \
__atomic_compare_exchange_##size(volatile type* ptr, type* oldvalp, \
type newval, bool weak __unused, \
int models, int modelf __unused) \
{ \
type oldval = *oldvalp; \
pre_atomic_barrier(models); \
type retval = atomic_val_compare_and_exchange(ptr, oldval, newval); \
post_atomic_barrier(models); \
bool success = (retval == oldval); \
*oldvalp = retval; \
return success; \
} \
\
type \
__atomic_exchange_##size(volatile type* ptr, type val, int model) \
{ \
pre_atomic_barrier(model); \
type retval = atomic_exchange(ptr, val); \
post_atomic_barrier(model); \
return retval; \
}
__atomic_exchange_methods (int, 4)
__atomic_exchange_methods (long long, 8)
 
/* Subword methods require the same approach for both TILEPro and
TILE-Gx. We load the background data for the word, insert the
desired subword piece, then compare-and-exchange it into place. */
#define u8 unsigned char
#define u16 unsigned short
#define __atomic_subword_cmpxchg(type, size) \
\
bool \
__atomic_compare_exchange_##size(volatile type* ptr, type* guess, \
type val, bool weak __unused, int models, \
int modelf __unused) \
{ \
pre_atomic_barrier(models); \
unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \
const int shift = ((unsigned long)ptr & 3UL) * 8; \
const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \
const unsigned int bgmask = ~(valmask << shift); \
unsigned int oldword = *p; \
type oldval = (oldword >> shift) & valmask; \
if (__builtin_expect((oldval == *guess), 1)) { \
unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
oldword = atomic_val_compare_and_exchange(p, oldword, word); \
oldval = (oldword >> shift) & valmask; \
} \
post_atomic_barrier(models); \
bool success = (oldval == *guess); \
*guess = oldval; \
return success; \
}
__atomic_subword_cmpxchg (u8, 1)
__atomic_subword_cmpxchg (u16, 2)
/* For the atomic-update subword methods, we use the same approach as
above, but we retry until we succeed if the compare-and-exchange
fails. */
#define __atomic_subword(type, proto, top, expr, bottom) \
proto \
{ \
top \
unsigned int *p = (unsigned int *)((unsigned long)ptr & ~3UL); \
const int shift = ((unsigned long)ptr & 3UL) * 8; \
const unsigned int valmask = (1 << (sizeof(type) * 8)) - 1; \
const unsigned int bgmask = ~(valmask << shift); \
unsigned int oldword, xword = *p; \
type val, oldval; \
do { \
oldword = xword; \
oldval = (oldword >> shift) & valmask; \
val = expr; \
unsigned int word = (oldword & bgmask) | ((val & valmask) << shift); \
xword = atomic_val_compare_and_exchange(p, oldword, word); \
} while (__builtin_expect(xword != oldword, 0)); \
bottom \
}
#define __atomic_subword_fetch(type, funcname, expr, retval) \
__atomic_subword(type, \
type __atomic_ ## funcname(volatile type *ptr, type i, int model), \
pre_atomic_barrier(model);, \
expr, \
post_atomic_barrier(model); return retval;)
__atomic_subword_fetch (u8, fetch_add_1, oldval + i, oldval)
__atomic_subword_fetch (u8, fetch_sub_1, oldval - i, oldval)
__atomic_subword_fetch (u8, fetch_or_1, oldval | i, oldval)
__atomic_subword_fetch (u8, fetch_and_1, oldval & i, oldval)
__atomic_subword_fetch (u8, fetch_xor_1, oldval ^ i, oldval)
__atomic_subword_fetch (u8, fetch_nand_1, ~(oldval & i), oldval)
__atomic_subword_fetch (u16, fetch_add_2, oldval + i, oldval)
__atomic_subword_fetch (u16, fetch_sub_2, oldval - i, oldval)
__atomic_subword_fetch (u16, fetch_or_2, oldval | i, oldval)
__atomic_subword_fetch (u16, fetch_and_2, oldval & i, oldval)
__atomic_subword_fetch (u16, fetch_xor_2, oldval ^ i, oldval)
__atomic_subword_fetch (u16, fetch_nand_2, ~(oldval & i), oldval)
__atomic_subword_fetch (u8, add_fetch_1, oldval + i, val)
__atomic_subword_fetch (u8, sub_fetch_1, oldval - i, val)
__atomic_subword_fetch (u8, or_fetch_1, oldval | i, val)
__atomic_subword_fetch (u8, and_fetch_1, oldval & i, val)
__atomic_subword_fetch (u8, xor_fetch_1, oldval ^ i, val)
__atomic_subword_fetch (u8, nand_fetch_1, ~(oldval & i), val)
__atomic_subword_fetch (u16, add_fetch_2, oldval + i, val)
__atomic_subword_fetch (u16, sub_fetch_2, oldval - i, val)
__atomic_subword_fetch (u16, or_fetch_2, oldval | i, val)
__atomic_subword_fetch (u16, and_fetch_2, oldval & i, val)
__atomic_subword_fetch (u16, xor_fetch_2, oldval ^ i, val)
__atomic_subword_fetch (u16, nand_fetch_2, ~(oldval & i), val)
#define __atomic_subword_lock(type, size) \
\
__atomic_subword(type, \
type __atomic_exchange_##size(volatile type* ptr, type nval, int model), \
pre_atomic_barrier(model);, \
nval, \
post_atomic_barrier(model); return oldval;)
__atomic_subword_lock (u8, 1)
__atomic_subword_lock (u16, 2)

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.