OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [libgfortran/] [generated/] [matmul_c8.c] - Blame information for rev 14

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 14 jlechner
/* Implementation of the MATMUL intrinsic
2
   Copyright 2002, 2005 Free Software Foundation, Inc.
3
   Contributed by Paul Brook <paul@nowt.org>
4
 
5
This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
 
7
Libgfortran is free software; you can redistribute it and/or
8
modify it under the terms of the GNU General Public
9
License as published by the Free Software Foundation; either
10
version 2 of the License, or (at your option) any later version.
11
 
12
In addition to the permissions in the GNU General Public License, the
13
Free Software Foundation gives you unlimited permission to link the
14
compiled version of this file into combinations with other programs,
15
and to distribute those combinations without any restriction coming
16
from the use of this file.  (The General Public License restrictions
17
do apply in other respects; for example, they cover modification of
18
the file, and distribution when not linked into a combine
19
executable.)
20
 
21
Libgfortran is distributed in the hope that it will be useful,
22
but WITHOUT ANY WARRANTY; without even the implied warranty of
23
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
GNU General Public License for more details.
25
 
26
You should have received a copy of the GNU General Public
27
License along with libgfortran; see the file COPYING.  If not,
28
write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
29
Boston, MA 02110-1301, USA.  */
30
 
31
#include "config.h"
32
#include <stdlib.h>
33
#include <string.h>
34
#include <assert.h>
35
#include "libgfortran.h"
36
 
37
#if defined (HAVE_GFC_COMPLEX_8)
38
 
39
/* This is a C version of the following fortran pseudo-code. The key
40
   point is the loop order -- we access all arrays column-first, which
41
   improves the performance enough to boost galgel spec score by 50%.
42
 
43
   DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
44
   C = 0
45
   DO J=1,N
46
     DO K=1,COUNT
47
       DO I=1,M
48
         C(I,J) = C(I,J)+A(I,K)*B(K,J)
49
*/
50
 
51
extern void matmul_c8 (gfc_array_c8 * const restrict retarray,
52
        gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b);
53
export_proto(matmul_c8);
54
 
55
void
56
matmul_c8 (gfc_array_c8 * const restrict retarray,
57
        gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b)
58
{
59
  const GFC_COMPLEX_8 * restrict abase;
60
  const GFC_COMPLEX_8 * restrict bbase;
61
  GFC_COMPLEX_8 * restrict dest;
62
 
63
  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
64
  index_type x, y, n, count, xcount, ycount;
65
 
66
  assert (GFC_DESCRIPTOR_RANK (a) == 2
67
          || GFC_DESCRIPTOR_RANK (b) == 2);
68
 
69
/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
70
 
71
   Either A or B (but not both) can be rank 1:
72
 
73
   o One-dimensional argument A is implicitly treated as a row matrix
74
     dimensioned [1,count], so xcount=1.
75
 
76
   o One-dimensional argument B is implicitly treated as a column matrix
77
     dimensioned [count, 1], so ycount=1.
78
  */
79
 
80
  if (retarray->data == NULL)
81
    {
82
      if (GFC_DESCRIPTOR_RANK (a) == 1)
83
        {
84
          retarray->dim[0].lbound = 0;
85
          retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
86
          retarray->dim[0].stride = 1;
87
        }
88
      else if (GFC_DESCRIPTOR_RANK (b) == 1)
89
        {
90
          retarray->dim[0].lbound = 0;
91
          retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
92
          retarray->dim[0].stride = 1;
93
        }
94
      else
95
        {
96
          retarray->dim[0].lbound = 0;
97
          retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
98
          retarray->dim[0].stride = 1;
99
 
100
          retarray->dim[1].lbound = 0;
101
          retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
102
          retarray->dim[1].stride = retarray->dim[0].ubound+1;
103
        }
104
 
105
      retarray->data
106
        = internal_malloc_size (sizeof (GFC_COMPLEX_8) * size0 ((array_t *) retarray));
107
      retarray->offset = 0;
108
    }
109
 
110
  if (retarray->dim[0].stride == 0)
111
    retarray->dim[0].stride = 1;
112
 
113
  /* This prevents constifying the input arguments.  */
114
  if (a->dim[0].stride == 0)
115
    a->dim[0].stride = 1;
116
  if (b->dim[0].stride == 0)
117
    b->dim[0].stride = 1;
118
 
119
 
120
  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
121
    {
122
      /* One-dimensional result may be addressed in the code below
123
         either as a row or a column matrix. We want both cases to
124
         work. */
125
      rxstride = rystride = retarray->dim[0].stride;
126
    }
127
  else
128
    {
129
      rxstride = retarray->dim[0].stride;
130
      rystride = retarray->dim[1].stride;
131
    }
132
 
133
 
134
  if (GFC_DESCRIPTOR_RANK (a) == 1)
135
    {
136
      /* Treat it as a a row matrix A[1,count]. */
137
      axstride = a->dim[0].stride;
138
      aystride = 1;
139
 
140
      xcount = 1;
141
      count = a->dim[0].ubound + 1 - a->dim[0].lbound;
142
    }
143
  else
144
    {
145
      axstride = a->dim[0].stride;
146
      aystride = a->dim[1].stride;
147
 
148
      count = a->dim[1].ubound + 1 - a->dim[1].lbound;
149
      xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
150
    }
151
 
152
  assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);
153
 
154
  if (GFC_DESCRIPTOR_RANK (b) == 1)
155
    {
156
      /* Treat it as a column matrix B[count,1] */
157
      bxstride = b->dim[0].stride;
158
 
159
      /* bystride should never be used for 1-dimensional b.
160
         in case it is we want it to cause a segfault, rather than
161
         an incorrect result. */
162
      bystride = 0xDEADBEEF;
163
      ycount = 1;
164
    }
165
  else
166
    {
167
      bxstride = b->dim[0].stride;
168
      bystride = b->dim[1].stride;
169
      ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
170
    }
171
 
172
  abase = a->data;
173
  bbase = b->data;
174
  dest = retarray->data;
175
 
176
  if (rxstride == 1 && axstride == 1 && bxstride == 1)
177
    {
178
      const GFC_COMPLEX_8 * restrict bbase_y;
179
      GFC_COMPLEX_8 * restrict dest_y;
180
      const GFC_COMPLEX_8 * restrict abase_n;
181
      GFC_COMPLEX_8 bbase_yn;
182
 
183
      if (rystride == xcount)
184
        memset (dest, 0, (sizeof (GFC_COMPLEX_8) * xcount * ycount));
185
      else
186
        {
187
          for (y = 0; y < ycount; y++)
188
            for (x = 0; x < xcount; x++)
189
              dest[x + y*rystride] = (GFC_COMPLEX_8)0;
190
        }
191
 
192
      for (y = 0; y < ycount; y++)
193
        {
194
          bbase_y = bbase + y*bystride;
195
          dest_y = dest + y*rystride;
196
          for (n = 0; n < count; n++)
197
            {
198
              abase_n = abase + n*aystride;
199
              bbase_yn = bbase_y[n];
200
              for (x = 0; x < xcount; x++)
201
                {
202
                  dest_y[x] += abase_n[x] * bbase_yn;
203
                }
204
            }
205
        }
206
    }
207
  else
208
    {
209
      for (y = 0; y < ycount; y++)
210
        for (x = 0; x < xcount; x++)
211
          dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
212
 
213
      for (y = 0; y < ycount; y++)
214
        for (n = 0; n < count; n++)
215
          for (x = 0; x < xcount; x++)
216
            /* dest[x,y] += a[x,n] * b[n,y] */
217
            dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
218
    }
219
}
220
 
221
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.