OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

Compare Revisions

  • This comparison shows the changes necessary to convert path
    /openrisc/tags/gnu-dev/fsf-gcc-snapshot-1-mar-12/or1k-gcc/libgcc/config/spu
    from Rev 734 to Rev 783
    Reverse comparison

Rev 734 → Rev 783

/mfc_tag_release.c
0,0 → 1,59
/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
 
This file is part of GCC.
 
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
 
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include <spu_mfcio.h>
extern vector unsigned int __mfc_tag_table;
 
/* Release the specified DMA tag from exclusive use. Once released, the
tag is available for future reservation. Upon sucessful release,
MFC_DMA_TAG_VALID is returned. If the specified tag is not in the
range 0 to 31, or had not been reserved, no action is taken and
MFC_DMA_TAG_INVALID is returned. */
 
unsigned int
__mfc_tag_release (unsigned int tag)
{
vector unsigned int is_invalid;
vector unsigned int mask = (vector unsigned int)
{ 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
vector signed int zero = (vector signed int) { 0, 0, 0, 0 };
 
vector signed int has_been_reserved;
 
/* Check if the tag is out of range. */
is_invalid = spu_cmpgt (spu_promote (tag, 0), 31);
 
/* Check whether the tag has been reserved, set to all 1 if has not
been reserved, 0 otherwise. */
has_been_reserved = (vector signed int) spu_rl (__mfc_tag_table, tag);
has_been_reserved = (vector signed int) spu_cmpgt (zero, has_been_reserved);
 
/* Set invalid. */
is_invalid = spu_or ((vector unsigned int) has_been_reserved, is_invalid);
 
mask = spu_rlmask (mask, (int)(-tag));
__mfc_tag_table = spu_or (__mfc_tag_table, mask);
 
return spu_extract(is_invalid, 0);
}
 
/float_unssidf.c
0,0 → 1,45
/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This file is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include <spu_intrinsics.h>
const unsigned char __sidf_pat[16] __attribute__ ((__aligned__ (16))) = {
0x02, 0x03, 0x10, 0x11,
0x12, 0x13, 0x80, 0x80,
0x06, 0x07, 0x14, 0x15,
0x16, 0x17, 0x80, 0x80
};
 
/* double __float_unssidf (unsigned int SI) */
qword __float_unssidf (qword SI);
qword
__float_unssidf (qword SI)
{
qword t0, t1, t2, t3, t4, t5, t6, t7;
t0 = si_clz (SI);
t1 = si_il (1054);
t2 = si_shl (SI, t0);
t3 = si_ceqi (t0, 32);
t4 = si_sf (t0, t1);
t5 = si_a (t2, t2);
t6 = si_andc (t4, t3);
t7 = si_shufb (t6, t5, *(const qword *) __sidf_pat);
return si_shlqbii (t7, 4);
}
/float_unsdisf.c
0,0 → 1,31
/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This file is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
/* Prototype. */
float __floatundisf (unsigned long long x);
 
float __floatundisf (unsigned long long x)
{
/* The SPU back-end now generates inline code for this conversion.
This file is solely used to provide the __floatundisf function
for objects generated with prior versions of GCC. */
return x;
}
/float_unsdidf.c
0,0 → 1,54
/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This file is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include <spu_intrinsics.h>
const unsigned char __didf_scale[16] __attribute__ ((__aligned__ (16))) = {
0x00, 0x00, 0x04, 0x3e,
0x00, 0x00, 0x04, 0x1e,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00
};
const unsigned char __didf_pat[16] __attribute__ ((__aligned__ (16))) = {
0x02, 0x03, 0x10, 0x11,
0x12, 0x13, 0x80, 0x80,
0x06, 0x07, 0x14, 0x15,
0x16, 0x17, 0x80, 0x80
};
 
/* double __float_unsdidf (unsigned long long int)
Construct two exact doubles representing the high and low parts (in
parallel), then add them. */
qword __float_unsdidf (qword DI);
qword
__float_unsdidf (qword DI)
{
qword t0, t1, t2, t3, t4, t5, t6, t7, t8;
t0 = si_clz (DI);
t1 = si_shl (DI, t0);
t2 = si_ceqi (t0, 32);
t3 = si_sf (t0, *(const qword *) __didf_scale);
t4 = si_a (t1, t1);
t5 = si_andc (t3, t2);
t6 = si_shufb (t5, t4, *(const qword *) __didf_pat);
t7 = si_shlqbii (t6, 4);
t8 = si_shlqbyi (t7, 8);
return si_dfa (t7, t8);
}
/divmodti4.c
0,0 → 1,188
/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This file is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include <spu_intrinsics.h>
 
typedef unsigned int UTItype __attribute__ ((mode (TI)));
typedef int TItype __attribute__ ((mode (TI)));
TItype __divti3 (TItype u, TItype v);
TItype __modti3 (TItype u, TItype v);
UTItype __udivti3 (UTItype u, UTItype v);
UTItype __umodti3 (UTItype u, UTItype v);
UTItype __udivmodti4 (UTItype u, UTItype v, UTItype *w);
 
union qword_UTItype
{
qword q;
UTItype t;
};
inline static qword
si_from_UTItype (UTItype t)
{
union qword_UTItype u;
u.t = t;
return u.q;
}
 
inline static UTItype
si_to_UTItype (qword q)
{
union qword_UTItype u;
u.q = q;
return u.t;
}
 
inline static unsigned int
count_leading_zeros (UTItype x)
{
qword c = si_clz (*(qword *) & x);
qword cmp0 = si_cgti (c, 31);
qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4));
qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8));
qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4)));
s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8)));
s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12)));
return si_to_uint (s);
}
 
/* Based on implementation of udivmodsi4, which is essentially
* an optimized version of libgcc/udivmodsi4.c
clz %7,%2
clz %4,%1
il %5,1
fsmbi %0,0
sf %7,%4,%7
ori %3,%1,0
shl %5,%5,%7
shl %4,%2,%7
1: or %8,%0,%5
rotmi %5,%5,-1
clgt %6,%4,%3
sf %7,%4,%3
rotmi %4,%4,-1
selb %0,%8,%0,%6
selb %3,%7,%3,%6
3: brnz %5,1b
*/
 
UTItype
__udivmodti4 (UTItype num, UTItype den, UTItype * rp)
{
qword shift =
si_from_uint (count_leading_zeros (den) - count_leading_zeros (num));
qword n0 = si_from_UTItype (num);
qword d0 = si_from_UTItype (den);
qword bit = si_andi (si_fsmbi (1), 1);
qword r0 = si_il (0);
qword m1 = si_fsmbi (0x000f);
qword mask, r1, n1;
 
d0 = si_shlqbybi (si_shlqbi (d0, shift), shift);
bit = si_shlqbybi (si_shlqbi (bit, shift), shift);
 
do
{
r1 = si_or (r0, bit);
 
// n1 = n0 - d0 in TImode
n1 = si_bg (d0, n0);
n1 = si_shlqbyi (n1, 4);
n1 = si_sf (m1, n1);
n1 = si_bgx (d0, n0, n1);
n1 = si_shlqbyi (n1, 4);
n1 = si_sf (m1, n1);
n1 = si_bgx (d0, n0, n1);
n1 = si_shlqbyi (n1, 4);
n1 = si_sf (m1, n1);
n1 = si_sfx (d0, n0, n1);
 
mask = si_fsm (si_cgti (n1, -1));
r0 = si_selb (r0, r1, mask);
n0 = si_selb (n0, n1, mask);
bit = si_rotqmbii (bit, -1);
d0 = si_rotqmbii (d0, -1);
}
while (si_to_uint (si_orx (bit)));
if (rp)
*rp = si_to_UTItype (n0);
return si_to_UTItype (r0);
}
 
UTItype
__udivti3 (UTItype n, UTItype d)
{
return __udivmodti4 (n, d, (UTItype *)0);
}
 
UTItype
__umodti3 (UTItype n, UTItype d)
{
UTItype w;
__udivmodti4 (n, d, &w);
return w;
}
 
TItype
__divti3 (TItype n, TItype d)
{
int c = 0;
TItype w;
 
if (n < 0)
{
c = ~c;
n = -n;
}
if (d < 0)
{
c = ~c;
d = -d;
}
 
w = __udivmodti4 (n, d, (UTItype *)0);
if (c)
w = -w;
return w;
}
 
TItype
__modti3 (TItype n, TItype d)
{
int c = 0;
TItype w;
 
if (n < 0)
{
c = ~c;
n = -n;
}
if (d < 0)
{
c = ~c;
d = -d;
}
 
__udivmodti4 (n, d, (UTItype *) &w);
if (c)
w = -w;
return w;
}
/cache.S
0,0 → 1,43
/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
 
This file is part of GCC.
 
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
 
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
.data
.p2align 7
.global __cache
__cache:
.rept __CACHE_SIZE__ * 8
.fill 128
.endr
 
.p2align 7
.global __cache_tag_array
__cache_tag_array:
.rept __CACHE_SIZE__ * 2
.long 1, 1, 1, 1
.fill 128-16
.endr
__end_cache_tag_array:
 
.globl __cache_tag_array_size
.set __cache_tag_array_size, __end_cache_tag_array-__cache_tag_array
 
/mfc_multi_tag_reserve.c
0,0 → 1,84
/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
 
This file is part of GCC.
 
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
 
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include <spu_mfcio.h>
extern vector unsigned int __mfc_tag_table;
 
/* Reserve a sequential group of tags for exclusive use. The number of
tags to be reserved is specified by the <number_of_tags> parameter.
This routine returns the first tag ID for a sequential list of
available tags and marks them as reserved. The reserved group
of tags is in the range starting from the returned tag through
the returned tag + <number_of_tags>-1.
 
If the number of tags requested exceeds the number of available
sequential tags, then MFC_DMA_TAG_INVALID is returned indicating
that the request could not be serviced. */
 
unsigned int
__mfc_multi_tag_reserve (unsigned int number_of_tags)
{
vector unsigned int table_copy;
vector unsigned int one = (vector unsigned int)
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
vector unsigned int count_busy, is_valid;
vector unsigned int count_total;
vector unsigned int count_avail = (vector unsigned int) { 0, 0, 0, 0 };
vector unsigned int index = (vector unsigned int) { 0, 0, 0, 0 };
 
table_copy = __mfc_tag_table;
 
 
/* count_busy: number of consecutive busy tags
count_avail: number of consecutive free tags
table_copy: temporary copy of the tag table
count_total: sum of count_busy and count_avail
index: index of the current working tag */
do
{
table_copy = spu_sl (table_copy, count_avail);
 
count_busy = spu_cntlz (table_copy);
table_copy = spu_sl (table_copy, count_busy);
count_avail = spu_cntlz (spu_xor(table_copy, -1));
count_total = spu_add (count_busy, count_avail);
index = spu_add (index, count_total);
}
while (spu_extract (count_avail, 0) < number_of_tags
&& spu_extract (table_copy, 0) != 0);
 
index = spu_sub (index, count_avail);
 
/* is_valid is set to 0xFFFFFFFF if table_copy == 0, 0 otherwise. */
is_valid = spu_cmpeq (table_copy, 0);
index = spu_sel (index, is_valid, is_valid);
 
/* Now I need to actually mark the tags as used. */
table_copy = spu_sl (one, number_of_tags);
table_copy = spu_rl (table_copy, -number_of_tags - spu_extract (index, 0));
table_copy = spu_sel (table_copy, __mfc_tag_table, table_copy);
__mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_valid);
 
return spu_extract (index, 0);
}
 
/t-elf
0,0 → 1,59
# Don't let CTOR_LIST end up in sdata section.
# FIXME: This is the default.
CRTSTUFF_T_CFLAGS =
 
# We exclude those because the libgcc2.c default versions do not support
# the SPU single-precision format (round towards zero). We provide our
# own versions below and/or via direct expansion.
LIB2FUNCS_EXCLUDE = _floatdisf _floatundisf _floattisf _floatunstisf
 
LIB2ADD_ST = $(srcdir)/config/spu/float_unssidf.c \
$(srcdir)/config/spu/float_unsdidf.c \
$(srcdir)/config/spu/float_unsdisf.c \
$(srcdir)/config/spu/float_disf.c \
$(srcdir)/config/spu/mfc_tag_table.c \
$(srcdir)/config/spu/mfc_tag_reserve.c \
$(srcdir)/config/spu/mfc_tag_release.c \
$(srcdir)/config/spu/mfc_multi_tag_reserve.c \
$(srcdir)/config/spu/mfc_multi_tag_release.c \
$(srcdir)/config/spu/multi3.c \
$(srcdir)/config/spu/divmodti4.c \
$(srcdir)/config/spu/divv2df3.c
 
# Build TImode conversion routines to support Fortran 128-bit
# integer data types.
LIB2_SIDITI_CONV_FUNCS = yes
 
HOST_LIBGCC2_CFLAGS += -mwarn-reloc -D__IN_LIBGCC2
 
# Neither gcc or newlib seem to have a standard way to generate multiple
# crt*.o files. So we don't use the standard crt0.o name anymore.
 
cachemgr.o: $(srcdir)/config/spu/cachemgr.c
$(gcc_compile) -c $<
 
# Specialised rule to add a -D flag.
cachemgr_nonatomic.o: $(srcdir)/config/spu/cachemgr.c
$(gcc_compile) -DNONATOMIC -c $<
 
libgcc_%.a: %.o
$(AR_FOR_TARGET) -rcs $@ $<
 
cache8k.o: $(srcdir)/config/spu/cache.S
$(gcc_compile) -D__CACHE_SIZE__=8 -c $<
 
cache16k.o: $(srcdir)/config/spu/cache.S
$(gcc_compile) -D__CACHE_SIZE__=16 -c $<
 
cache32k.o: $(srcdir)/config/spu/cache.S
$(gcc_compile) -D__CACHE_SIZE__=32 -c $<
 
cache64k.o: $(srcdir)/config/spu/cache.S
$(gcc_compile) -D__CACHE_SIZE__=64 -c $<
 
cache128k.o: $(srcdir)/config/spu/cache.S
$(gcc_compile) -D__CACHE_SIZE__=128 -c $<
 
# We provide our own version of __divdf3 that performs better and has
# better support for non-default rounding modes.
DPBIT_FUNCS := $(filter-out _div_df, $(DPBIT_FUNCS))
/mfc_tag_table.c
0,0 → 1,39
/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
 
This file is part of GCC.
 
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
 
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
/* The free tag table used by the MFC tag manager, with tag0
reserved for the overlay manager. */
__vector unsigned int
__mfc_tag_table = (__vector unsigned int) { 0x7FFFFFFF, -1, -1, -1 };
 
/* Arrange to release tag0 if overlays are not present. */
static void __mfc_tag_init (void) __attribute__ ((constructor));
 
static void
__mfc_tag_init (void)
{
extern void _ovly_table __attribute__ ((weak));
 
if (&_ovly_table == 0)
__mfc_tag_table = (__vector unsigned int) { -1, -1, -1, -1 };
}
/divv2df3.c
0,0 → 1,195
/* Copyright (C) 2009 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This file is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include <spu_intrinsics.h>
 
vector double __divv2df3 (vector double a_in, vector double b_in);
 
/* __divv2df3 divides the vector dividend a by the vector divisor b and
returns the resulting vector quotient. Maximum error about 0.5 ulp
over entire double range including denorms, compared to true result
in round-to-nearest rounding mode. Handles Inf or NaN operands and
results correctly. */
 
vector double
__divv2df3 (vector double a_in, vector double b_in)
{
/* Variables */
vec_int4 exp, exp_bias;
vec_uint4 no_underflow, overflow;
vec_float4 mant_bf, inv_bf;
vec_ullong2 exp_a, exp_b;
vec_ullong2 a_nan, a_zero, a_inf, a_denorm, a_denorm0;
vec_ullong2 b_nan, b_zero, b_inf, b_denorm, b_denorm0;
vec_ullong2 nan;
vec_uint4 a_exp, b_exp;
vec_ullong2 a_mant_0, b_mant_0;
vec_ullong2 a_exp_1s, b_exp_1s;
vec_ullong2 sign_exp_mask;
 
vec_double2 a, b;
vec_double2 mant_a, mant_b, inv_b, q0, q1, q2, mult;
 
/* Constants */
vec_uint4 exp_mask_u32 = spu_splats((unsigned int)0x7FF00000);
vec_uchar16 splat_hi = (vec_uchar16){0,1,2,3, 0,1,2,3, 8, 9,10,11, 8,9,10,11};
vec_uchar16 swap_32 = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
vec_ullong2 exp_mask = spu_splats(0x7FF0000000000000ULL);
vec_ullong2 sign_mask = spu_splats(0x8000000000000000ULL);
vec_float4 onef = spu_splats(1.0f);
vec_double2 one = spu_splats(1.0);
vec_double2 exp_53 = (vec_double2)spu_splats(0x0350000000000000ULL);
 
sign_exp_mask = spu_or(sign_mask, exp_mask);
 
/* Extract the floating point components from each of the operands including
* exponent and mantissa.
*/
a_exp = (vec_uint4)spu_and((vec_uint4)a_in, exp_mask_u32);
a_exp = spu_shuffle(a_exp, a_exp, splat_hi);
b_exp = (vec_uint4)spu_and((vec_uint4)b_in, exp_mask_u32);
b_exp = spu_shuffle(b_exp, b_exp, splat_hi);
 
a_mant_0 = (vec_ullong2)spu_cmpeq((vec_uint4)spu_andc((vec_ullong2)a_in, sign_exp_mask), 0);
a_mant_0 = spu_and(a_mant_0, spu_shuffle(a_mant_0, a_mant_0, swap_32));
 
b_mant_0 = (vec_ullong2)spu_cmpeq((vec_uint4)spu_andc((vec_ullong2)b_in, sign_exp_mask), 0);
b_mant_0 = spu_and(b_mant_0, spu_shuffle(b_mant_0, b_mant_0, swap_32));
 
a_exp_1s = (vec_ullong2)spu_cmpeq(a_exp, exp_mask_u32);
b_exp_1s = (vec_ullong2)spu_cmpeq(b_exp, exp_mask_u32);
 
/* Identify all possible special values that must be accomodated including:
* +-denorm, +-0, +-infinity, and NaNs.
*/
a_denorm0= (vec_ullong2)spu_cmpeq(a_exp, 0);
a_nan = spu_andc(a_exp_1s, a_mant_0);
a_zero = spu_and (a_denorm0, a_mant_0);
a_inf = spu_and (a_exp_1s, a_mant_0);
a_denorm = spu_andc(a_denorm0, a_zero);
 
b_denorm0= (vec_ullong2)spu_cmpeq(b_exp, 0);
b_nan = spu_andc(b_exp_1s, b_mant_0);
b_zero = spu_and (b_denorm0, b_mant_0);
b_inf = spu_and (b_exp_1s, b_mant_0);
b_denorm = spu_andc(b_denorm0, b_zero);
 
/* Scale denorm inputs to into normalized numbers by conditionally scaling the
* input parameters.
*/
a = spu_sub(spu_or(a_in, exp_53), spu_sel(exp_53, a_in, sign_mask));
a = spu_sel(a_in, a, a_denorm);
 
b = spu_sub(spu_or(b_in, exp_53), spu_sel(exp_53, b_in, sign_mask));
b = spu_sel(b_in, b, b_denorm);
 
/* Extract the divisor and dividend exponent and force parameters into the signed
* range [1.0,2.0) or [-1.0,2.0).
*/
exp_a = spu_and((vec_ullong2)a, exp_mask);
exp_b = spu_and((vec_ullong2)b, exp_mask);
 
mant_a = spu_sel(a, one, (vec_ullong2)exp_mask);
mant_b = spu_sel(b, one, (vec_ullong2)exp_mask);
/* Approximate the single reciprocal of b by using
* the single precision reciprocal estimate followed by one
* single precision iteration of Newton-Raphson.
*/
mant_bf = spu_roundtf(mant_b);
inv_bf = spu_re(mant_bf);
inv_bf = spu_madd(spu_nmsub(mant_bf, inv_bf, onef), inv_bf, inv_bf);
 
/* Perform 2 more Newton-Raphson iterations in double precision. The
* result (q1) is in the range (0.5, 2.0).
*/
inv_b = spu_extend(inv_bf);
inv_b = spu_madd(spu_nmsub(mant_b, inv_b, one), inv_b, inv_b);
q0 = spu_mul(mant_a, inv_b);
q1 = spu_madd(spu_nmsub(mant_b, q0, mant_a), inv_b, q0);
 
/* Determine the exponent correction factor that must be applied
* to q1 by taking into account the exponent of the normalized inputs
* and the scale factors that were applied to normalize them.
*/
exp = spu_rlmaska(spu_sub((vec_int4)exp_a, (vec_int4)exp_b), -20);
exp = spu_add(exp, (vec_int4)spu_add(spu_and((vec_int4)a_denorm, -0x34), spu_and((vec_int4)b_denorm, 0x34)));
/* Bias the quotient exponent depending on the sign of the exponent correction
* factor so that a single multiplier will ensure the entire double precision
* domain (including denorms) can be achieved.
*
* exp bias q1 adjust exp
* ===== ======== ==========
* positive 2^+65 -65
* negative 2^-64 +64
*/
exp_bias = spu_xor(spu_rlmaska(exp, -31), 64);
exp = spu_sub(exp, exp_bias);
 
q1 = spu_sel(q1, (vec_double2)spu_add((vec_int4)q1, spu_sl(exp_bias, 20)), exp_mask);
 
/* Compute a multiplier (mult) to applied to the quotient (q1) to produce the
* expected result. On overflow, clamp the multiplier to the maximum non-infinite
* number in case the rounding mode is not round-to-nearest.
*/
exp = spu_add(exp, 0x3FF);
no_underflow = spu_cmpgt(exp, 0);
overflow = spu_cmpgt(exp, 0x7FE);
exp = spu_and(spu_sl(exp, 20), (vec_int4)no_underflow);
exp = spu_and(exp, (vec_int4)exp_mask);
 
mult = spu_sel((vec_double2)exp, (vec_double2)(spu_add((vec_uint4)exp_mask, -1)), (vec_ullong2)overflow);
 
/* Handle special value conditions. These include:
*
* 1) IF either operand is a NaN OR both operands are 0 or INFINITY THEN a NaN
* results.
* 2) ELSE IF the dividend is an INFINITY OR the divisor is 0 THEN a INFINITY results.
* 3) ELSE IF the dividend is 0 OR the divisor is INFINITY THEN a 0 results.
*/
mult = spu_andc(mult, (vec_double2)spu_or(a_zero, b_inf));
mult = spu_sel(mult, (vec_double2)exp_mask, spu_or(a_inf, b_zero));
 
nan = spu_or(a_nan, b_nan);
nan = spu_or(nan, spu_and(a_zero, b_zero));
nan = spu_or(nan, spu_and(a_inf, b_inf));
 
mult = spu_or(mult, (vec_double2)nan);
 
/* Scale the final quotient */
 
q2 = spu_mul(q1, mult);
 
return (q2);
}
 
 
/* We use the same function for vector and scalar division. Provide the
scalar entry point as an alias. */
double __divdf3 (double a, double b)
__attribute__ ((__alias__ ("__divv2df3")));
 
/* Some toolchain builds used the __fast_divdf3 name for this helper function.
Provide this as another alternate entry point for compatibility. */
double __fast_divdf3 (double a, double b)
__attribute__ ((__alias__ ("__divv2df3")));
 
/cachemgr.c
0,0 → 1,438
/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
 
This file is part of GCC.
 
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
 
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include <spu_mfcio.h>
#include <spu_internals.h>
#include <spu_intrinsics.h>
#include <spu_cache.h>
 
extern unsigned long long __ea_local_store;
extern char __cache_tag_array_size;
 
#define LINE_SIZE 128
#define TAG_MASK (LINE_SIZE - 1)
 
#define WAYS 4
#define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE)
 
#define CACHE_LINES ((int) &__cache_tag_array_size / \
sizeof (struct __cache_tag_array) * WAYS)
 
struct __cache_tag_array
{
unsigned int tag_lo[WAYS];
unsigned int tag_hi[WAYS];
void *base[WAYS];
int reserved[WAYS];
vector unsigned short dirty_bits[WAYS];
};
 
extern struct __cache_tag_array __cache_tag_array[];
extern char __cache[];
 
/* In order to make the code seem a little cleaner, and to avoid having
64/32 bit ifdefs all over the place, we use macros. */
 
#ifdef __EA64__
typedef unsigned long long addr;
 
#define CHECK_TAG(_entry, _way, _tag) \
((_entry)->tag_lo[(_way)] == ((_tag) & 0xFFFFFFFF) \
&& (_entry)->tag_hi[(_way)] == ((_tag) >> 32))
 
#define GET_TAG(_entry, _way) \
((unsigned long long)(_entry)->tag_hi[(_way)] << 32 \
| (unsigned long long)(_entry)->tag_lo[(_way)])
 
#define SET_TAG(_entry, _way, _tag) \
(_entry)->tag_lo[(_way)] = (_tag) & 0xFFFFFFFF; \
(_entry)->tag_hi[(_way)] = (_tag) >> 32
 
#else /*__EA32__*/
typedef unsigned long addr;
 
#define CHECK_TAG(_entry, _way, _tag) \
((_entry)->tag_lo[(_way)] == (_tag))
 
#define GET_TAG(_entry, _way) \
((_entry)->tag_lo[(_way)])
 
#define SET_TAG(_entry, _way, _tag) \
(_entry)->tag_lo[(_way)] = (_tag)
 
#endif
 
/* In GET_ENTRY, we cast away the high 32 bits,
as the tag is only in the low 32. */
 
#define GET_ENTRY(_addr) \
((struct __cache_tag_array *) \
si_to_uint (si_a (si_and (si_from_uint ((unsigned int) (addr) (_addr)), \
si_from_uint (SET_MASK)), \
si_from_uint ((unsigned int) __cache_tag_array))))
 
#define GET_CACHE_LINE(_addr, _way) \
((void *) (__cache + ((_addr) & SET_MASK) * WAYS) + ((_way) * LINE_SIZE));
 
#define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) (_vec))))
#define SET_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] = 1)
#define CHECK_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] == 1)
 
#define LS_FLAG 0x80000000
#define SET_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] |= LS_FLAG)
#define CHECK_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] & LS_FLAG)
#define GET_LRU(_entry, _way) ((_entry)->reserved[(_way)] & ~LS_FLAG)
 
static int dma_tag = 32;
 
static void
__cache_evict_entry (struct __cache_tag_array *entry, int way)
{
addr tag = GET_TAG (entry, way);
 
if (CHECK_DIRTY (entry->dirty_bits[way]) && !CHECK_IS_LS (entry, way))
{
#ifdef NONATOMIC
/* Non-atomic writes. */
unsigned int oldmask, mach_stat;
char *line = ((void *) 0);
 
/* Enter critical section. */
mach_stat = spu_readch (SPU_RdMachStat);
spu_idisable ();
 
/* Issue DMA request. */
line = GET_CACHE_LINE (entry->tag_lo[way], way);
mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0);
 
/* Wait for DMA completion. */
oldmask = mfc_read_tag_mask ();
mfc_write_tag_mask (1 << dma_tag);
mfc_read_tag_status_all ();
mfc_write_tag_mask (oldmask);
 
/* Leave critical section. */
if (__builtin_expect (mach_stat & 1, 0))
spu_ienable ();
#else
/* Allocate a buffer large enough that we know it has 128 bytes
that are 128 byte aligned (for DMA). */
 
char buffer[LINE_SIZE + 127];
qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127);
qword *line = GET_CACHE_LINE (entry->tag_lo[way], way);
qword bits;
unsigned int mach_stat;
 
/* Enter critical section. */
mach_stat = spu_readch (SPU_RdMachStat);
spu_idisable ();
 
do
{
/* We atomically read the current memory into a buffer
modify the dirty bytes in the buffer, and write it
back. If writeback fails, loop and try again. */
 
mfc_getllar (buf_ptr, tag, 0, 0);
mfc_read_atomic_status ();
 
/* The method we're using to write 16 dirty bytes into
the buffer at a time uses fsmb which in turn uses
the least significant 16 bits of word 0, so we
load the bits and rotate so that the first bit of
the bitmap is in the first bit that fsmb will use. */
 
bits = (qword) entry->dirty_bits[way];
bits = si_rotqbyi (bits, -2);
 
/* Si_fsmb creates the mask of dirty bytes.
Use selb to nab the appropriate bits. */
buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits));
 
/* Rotate to next 16 byte section of cache. */
bits = si_rotqbyi (bits, 2);
 
buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits));
bits = si_rotqbyi (bits, 2);
buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits));
bits = si_rotqbyi (bits, 2);
buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits));
bits = si_rotqbyi (bits, 2);
buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits));
bits = si_rotqbyi (bits, 2);
buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits));
bits = si_rotqbyi (bits, 2);
buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits));
bits = si_rotqbyi (bits, 2);
buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits));
bits = si_rotqbyi (bits, 2);
 
mfc_putllc (buf_ptr, tag, 0, 0);
}
while (mfc_read_atomic_status ());
 
/* Leave critical section. */
if (__builtin_expect (mach_stat & 1, 0))
spu_ienable ();
#endif
}
 
/* In any case, marking the lo tag with 1 which denotes empty. */
SET_EMPTY (entry, way);
entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0);
}
 
void
__cache_evict (__ea void *ea)
{
addr tag = (addr) ea & ~TAG_MASK;
struct __cache_tag_array *entry = GET_ENTRY (ea);
int i = 0;
 
/* Cycles through all the possible ways an address could be at
and evicts the way if found. */
 
for (i = 0; i < WAYS; i++)
if (CHECK_TAG (entry, i, tag))
__cache_evict_entry (entry, i);
}
 
static void *
__cache_fill (int way, addr tag)
{
unsigned int oldmask, mach_stat;
char *line = ((void *) 0);
 
/* Reserve our DMA tag. */
if (dma_tag == 32)
dma_tag = mfc_tag_reserve ();
 
/* Enter critical section. */
mach_stat = spu_readch (SPU_RdMachStat);
spu_idisable ();
 
/* Issue DMA request. */
line = GET_CACHE_LINE (tag, way);
mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0);
 
/* Wait for DMA completion. */
oldmask = mfc_read_tag_mask ();
mfc_write_tag_mask (1 << dma_tag);
mfc_read_tag_status_all ();
mfc_write_tag_mask (oldmask);
 
/* Leave critical section. */
if (__builtin_expect (mach_stat & 1, 0))
spu_ienable ();
 
return (void *) line;
}
 
static void
__cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way)
{
 
addr tag = (addr) ea & ~TAG_MASK;
unsigned int lru = 0;
int i = 0;
int idx = 0;
 
/* If way > 4, then there are no empty slots, so we must evict
the least recently used entry. */
if (way >= 4)
{
for (i = 0; i < WAYS; i++)
{
if (GET_LRU (entry, i) > lru)
{
lru = GET_LRU (entry, i);
idx = i;
}
}
__cache_evict_entry (entry, idx);
way = idx;
}
 
/* Set the empty entry's tag and fill it's cache line. */
 
SET_TAG (entry, way, tag);
entry->reserved[way] = 0;
 
/* Check if the address is just an effective address within the
SPU's local store. */
 
/* Because the LS is not 256k aligned, we can't do a nice and mask
here to compare, so we must check the whole range. */
 
if ((addr) ea >= (addr) __ea_local_store
&& (addr) ea < (addr) (__ea_local_store + 0x40000))
{
SET_IS_LS (entry, way);
entry->base[way] =
(void *) ((unsigned int) ((addr) ea -
(addr) __ea_local_store) & ~0x7f);
}
else
{
entry->base[way] = __cache_fill (way, tag);
}
}
 
void *
__cache_fetch_dirty (__ea void *ea, int n_bytes_dirty)
{
#ifdef __EA64__
unsigned int tag_hi;
qword etag_hi;
#endif
unsigned int tag_lo;
struct __cache_tag_array *entry;
 
qword etag_lo;
qword equal;
qword bit_mask;
qword way;
 
/* This first chunk, we merely fill the pointer and tag. */
 
entry = GET_ENTRY (ea);
 
#ifndef __EA64__
tag_lo =
si_to_uint (si_andc
(si_shufb
(si_from_uint ((addr) ea), si_from_uint (0),
si_from_uint (0x00010203)), si_from_uint (TAG_MASK)));
#else
tag_lo =
si_to_uint (si_andc
(si_shufb
(si_from_ullong ((addr) ea), si_from_uint (0),
si_from_uint (0x04050607)), si_from_uint (TAG_MASK)));
 
tag_hi =
si_to_uint (si_shufb
(si_from_ullong ((addr) ea), si_from_uint (0),
si_from_uint (0x00010203)));
#endif
 
/* Increment LRU in reserved bytes. */
si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1),
si_from_ptr (entry), 48);
 
missreturn:
/* Check if the entry's lo_tag is equal to the address' lo_tag. */
etag_lo = si_lqd (si_from_ptr (entry), 0);
equal = si_ceq (etag_lo, si_from_uint (tag_lo));
#ifdef __EA64__
/* And the high tag too. */
etag_hi = si_lqd (si_from_ptr (entry), 16);
equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi))));
#endif
 
if ((si_to_uint (si_orx (equal)) == 0))
goto misshandler;
 
if (n_bytes_dirty)
{
/* way = 0x40,0x50,0x60,0x70 for each way, which is also the
offset of the appropriate dirty bits. */
way = si_shli (si_clz (si_gbb (equal)), 2);
 
/* To create the bit_mask, we set it to all 1s (uint -1), then we
shift it over (128 - n_bytes_dirty) times. */
 
bit_mask = si_from_uint (-1);
 
bit_mask =
si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8));
 
bit_mask =
si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8));
 
/* Rotate it around to the correct offset. */
bit_mask =
si_rotqby (bit_mask,
si_from_uint (-1 * ((addr) ea & TAG_MASK) / 8));
 
bit_mask =
si_rotqbi (bit_mask,
si_from_uint (-1 * ((addr) ea & TAG_MASK) % 8));
 
/* Update the dirty bits. */
si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask),
si_from_ptr (entry), way);
};
 
/* We've definitely found the right entry, set LRU (reserved) to 0
maintaining the LS flag (MSB). */
 
si_stqd (si_andc
(si_lqd (si_from_ptr (entry), 48),
si_and (equal, si_from_uint (~(LS_FLAG)))),
si_from_ptr (entry), 48);
 
return (void *)
si_to_uint (si_a
(si_orx
(si_and (si_lqd (si_from_ptr (entry), 32), equal)),
si_from_uint (((unsigned int) (addr) ea) & TAG_MASK)));
 
misshandler:
equal = si_ceqi (etag_lo, 1);
__cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2);
goto missreturn;
}
 
void *
__cache_fetch (__ea void *ea)
{
return __cache_fetch_dirty (ea, 0);
}
 
void
__cache_touch (__ea void *ea __attribute__ ((unused)))
{
/* NO-OP for now. */
}
 
void __cache_flush (void) __attribute__ ((destructor));
void
__cache_flush (void)
{
struct __cache_tag_array *entry = __cache_tag_array;
unsigned int i;
int j;
 
/* Cycle through each cache entry and evict all used ways. */
 
for (i = 0; i < CACHE_LINES / WAYS; i++)
{
for (j = 0; j < WAYS; j++)
if (!CHECK_EMPTY (entry, j))
__cache_evict_entry (entry, j);
 
entry++;
}
}
/float_disf.c
0,0 → 1,31
/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This file is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
/* Prototype. */
float __floatdisf (long long x);
 
float __floatdisf (long long x)
{
/* The SPU back-end now generates inline code for this conversion.
This file is solely used to provide the __floatdisf functions
for objects generated with prior versions of GCC. */
return x;
}
/mfc_multi_tag_release.c
0,0 → 1,72
/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
 
This file is part of GCC.
 
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
 
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include <spu_mfcio.h>
extern vector unsigned int __mfc_tag_table;
 
/* Release a sequential group of tags from exclusive use. The sequential
group of tags is the range starting from <first_tag> through
<first_tag>+<number_of_tags>-1. Upon sucessful release, MFC_DMA_TAG_VALID
is returned and the tags become available for future reservation.
 
If the specified tags were not previously reserved, no action is
taken and MFC_DMA_TAG_INVALID is returned. */
 
unsigned int
__mfc_multi_tag_release (unsigned int first_tag, unsigned int number_of_tags)
{
vector unsigned int table_copy, tmp, tmp1;
vector unsigned int one = (vector unsigned int)
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
vector unsigned int is_invalid;
unsigned int last_tag;
vector unsigned int has_been_reserved;
 
last_tag = first_tag + number_of_tags;
 
table_copy = spu_sl (one, number_of_tags);
table_copy = spu_rl (table_copy, -last_tag);
table_copy = spu_xor (table_copy, -1);
 
/* Make sure the tags are in range and valid. */
tmp = spu_cmpgt (spu_promote(last_tag, 0), 32);
tmp1 = spu_cmpgt (spu_promote(number_of_tags, 0), 32);
is_invalid = spu_cmpgt (spu_promote(first_tag, 0), 31);
 
/* All bits are set to 1 if invalid, 0 if valid. */
is_invalid = spu_or (tmp, is_invalid);
is_invalid = spu_or (tmp1, is_invalid);
 
/* check whether these tags have been reserved */
tmp = spu_rlmask (one, (int)-number_of_tags);
tmp1 = spu_sl (__mfc_tag_table, first_tag);
has_been_reserved = spu_cmpgt(tmp1, tmp);
 
is_invalid = spu_or (has_been_reserved, is_invalid);
 
table_copy = spu_sel (__mfc_tag_table, table_copy, table_copy);
__mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_invalid);
 
return spu_extract (is_invalid, 0);
}
 
/mfc_tag_reserve.c
0,0 → 1,51
/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
 
This file is part of GCC.
 
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
 
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
 
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include <spu_mfcio.h>
extern vector unsigned int __mfc_tag_table;
 
/* Reserves a DMA tag for exclusive use. This routine returns an available
tag id in the range 0 to 31 and marks the tag as reserved. If no tags
are available, MFC_DMA_TAG_INVALID is returned indicating that all tags
are already reserved. */
 
unsigned int
__mfc_tag_reserve (void)
{
vector unsigned int mask = (vector unsigned int)
{ 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
vector unsigned int count_zeros, is_valid;
vector signed int count_neg;
 
count_zeros = spu_cntlz (__mfc_tag_table);
count_neg = spu_sub (0, (vector signed int) count_zeros);
 
mask = spu_rlmask (mask, (vector signed int) count_neg);
__mfc_tag_table = spu_andc (__mfc_tag_table, mask);
 
is_valid = spu_cmpeq (count_zeros, 32);
count_zeros = spu_sel (count_zeros, is_valid, is_valid);
 
return spu_extract (count_zeros, 0);
}
 
/multi3.c
0,0 → 1,119
/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3 of the License, or (at your option)
any later version.
This file is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
 
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
 
#include <spu_intrinsics.h>
 
typedef int TItype __attribute__ ((mode (TI)));
 
union qword_TItype
{
qword q;
TItype t;
};
inline static qword
si_from_TItype (TItype t)
{
union qword_TItype u;
u.t = t;
return u.q;
}
 
inline static TItype
si_to_TItype (qword q)
{
union qword_TItype u;
u.q = q;
return u.t;
}
 
/* A straight forward vectorization and unrolling of
* short l[8], r[8];
* TItype total = 0;
* for (i = 0; i < 8; i++)
* for (j = 0; j < 8; j++)
* total += (TItype)((l[7-i] * r[7-j]) << (16 * (i + j)));
*/
TItype
__multi3 (TItype l, TItype r)
{
qword u = si_from_TItype (l);
qword v = si_from_TItype (r);
qword splat0 = si_shufb (v, v, si_ilh (0x0001));
qword splat1 = si_shufb (v, v, si_ilh (0x0203));
qword splat2 = si_shufb (v, v, si_ilh (0x0405));
qword splat3 = si_shufb (v, v, si_ilh (0x0607));
qword splat4 = si_shufb (v, v, si_ilh (0x0809));
qword splat5 = si_shufb (v, v, si_ilh (0x0a0b));
qword splat6 = si_shufb (v, v, si_ilh (0x0c0d));
qword splat7 = si_shufb (v, v, si_ilh (0x0e0f));
 
qword part0l = si_shlqbyi (si_mpyu (u, splat0), 14);
qword part1h = si_shlqbyi (si_mpyhhu (u, splat1), 14);
qword part1l = si_shlqbyi (si_mpyu (u, splat1), 12);
qword part2h = si_shlqbyi (si_mpyhhu (u, splat2), 12);
qword part2l = si_shlqbyi (si_mpyu (u, splat2), 10);
qword part3h = si_shlqbyi (si_mpyhhu (u, splat3), 10);
qword part3l = si_shlqbyi (si_mpyu (u, splat3), 8);
qword part4h = si_shlqbyi (si_mpyhhu (u, splat4), 8);
qword part4l = si_shlqbyi (si_mpyu (u, splat4), 6);
qword part5h = si_shlqbyi (si_mpyhhu (u, splat5), 6);
qword part5l = si_shlqbyi (si_mpyu (u, splat5), 4);
qword part6h = si_shlqbyi (si_mpyhhu (u, splat6), 4);
qword part6l = si_shlqbyi (si_mpyu (u, splat6), 2);
qword part7h = si_shlqbyi (si_mpyhhu (u, splat7), 2);
qword part7l = si_mpyu (u, splat7);
 
qword carry, total0, total1, total2, total3, total4;
qword total5, total6, total7, total8, total9, total10;
qword total;
 
total0 = si_a (si_a (si_a (part0l, part1h), si_a (part1l, part2h)), part7l);
total1 = si_a (part2l, part3h);
total2 = si_a (part3l, part4h);
total3 = si_a (part4l, part5h);
total4 = si_a (part5l, part6h);
total5 = si_a (part6l, part7h);
total6 = si_a (total0, total1);
total7 = si_a (total2, total3);
total8 = si_a (total4, total5);
total9 = si_a (total6, total7);
total10 = si_a (total8, total9);
 
carry = si_cg (part2l, part3h);
carry = si_a (carry, si_cg (part3l, part4h));
carry = si_a (carry, si_cg (part4l, part5h));
carry = si_a (carry, si_cg (part5l, part6h));
carry = si_a (carry, si_cg (part6l, part7h));
carry = si_a (carry, si_cg (total0, total1));
carry = si_a (carry, si_cg (total2, total3));
carry = si_a (carry, si_cg (total4, total5));
carry = si_a (carry, si_cg (total6, total7));
carry = si_a (carry, si_cg (total8, total9));
carry = si_shlqbyi (carry, 4);
 
total = si_cg (total10, carry);
total = si_shlqbyi (total, 4);
total = si_cgx (total10, carry, total);
total = si_shlqbyi (total, 4);
total = si_addx (total10, carry, total);
return si_to_TItype (total);
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.