OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [libgfortran/] [generated/] [matmul_c10.c] - Blame information for rev 849

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 733 jeremybenn
/* Implementation of the MATMUL intrinsic
2
   Copyright 2002, 2005, 2006, 2007, 2009 Free Software Foundation, Inc.
3
   Contributed by Paul Brook <paul@nowt.org>
4
 
5
This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
 
7
Libgfortran is free software; you can redistribute it and/or
8
modify it under the terms of the GNU General Public
9
License as published by the Free Software Foundation; either
10
version 3 of the License, or (at your option) any later version.
11
 
12
Libgfortran is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
GNU General Public License for more details.
16
 
17
Under Section 7 of GPL version 3, you are granted additional
18
permissions described in the GCC Runtime Library Exception, version
19
3.1, as published by the Free Software Foundation.
20
 
21
You should have received a copy of the GNU General Public License and
22
a copy of the GCC Runtime Library Exception along with this program;
23
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24
<http://www.gnu.org/licenses/>.  */
25
 
26
#include "libgfortran.h"
27
#include <stdlib.h>
28
#include <string.h>
29
#include <assert.h>
30
 
31
 
32
#if defined (HAVE_GFC_COMPLEX_10)
33
 
34
/* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
35
   passed to us by the front-end, in which case we'll call it for large
36
   matrices.  */
37
 
38
typedef void (*blas_call)(const char *, const char *, const int *, const int *,
39
                          const int *, const GFC_COMPLEX_10 *, const GFC_COMPLEX_10 *,
40
                          const int *, const GFC_COMPLEX_10 *, const int *,
41
                          const GFC_COMPLEX_10 *, GFC_COMPLEX_10 *, const int *,
42
                          int, int);
43
 
44
/* The order of loops is different in the case of plain matrix
45
   multiplication C=MATMUL(A,B), and in the frequent special case where
46
   the argument A is the temporary result of a TRANSPOSE intrinsic:
47
   C=MATMUL(TRANSPOSE(A),B).  Transposed temporaries are detected by
48
   looking at their strides.
49
 
50
   The equivalent Fortran pseudo-code is:
51
 
52
   DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
53
   IF (.NOT.IS_TRANSPOSED(A)) THEN
54
     C = 0
55
     DO J=1,N
56
       DO K=1,COUNT
57
         DO I=1,M
58
           C(I,J) = C(I,J)+A(I,K)*B(K,J)
59
   ELSE
60
     DO J=1,N
61
       DO I=1,M
62
         S = 0
63
         DO K=1,COUNT
64
           S = S+A(I,K)*B(K,J)
65
         C(I,J) = S
66
   ENDIF
67
*/
68
 
69
/* If try_blas is set to a nonzero value, then the matmul function will
70
   see if there is a way to perform the matrix multiplication by a call
71
   to the BLAS gemm function.  */
72
 
73
extern void matmul_c10 (gfc_array_c10 * const restrict retarray,
74
        gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
75
        int blas_limit, blas_call gemm);
76
export_proto(matmul_c10);
77
 
78
void
79
matmul_c10 (gfc_array_c10 * const restrict retarray,
80
        gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
81
        int blas_limit, blas_call gemm)
82
{
83
  const GFC_COMPLEX_10 * restrict abase;
84
  const GFC_COMPLEX_10 * restrict bbase;
85
  GFC_COMPLEX_10 * restrict dest;
86
 
87
  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
88
  index_type x, y, n, count, xcount, ycount;
89
 
90
  assert (GFC_DESCRIPTOR_RANK (a) == 2
91
          || GFC_DESCRIPTOR_RANK (b) == 2);
92
 
93
/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
94
 
95
   Either A or B (but not both) can be rank 1:
96
 
97
   o One-dimensional argument A is implicitly treated as a row matrix
98
     dimensioned [1,count], so xcount=1.
99
 
100
   o One-dimensional argument B is implicitly treated as a column matrix
101
     dimensioned [count, 1], so ycount=1.
102
  */
103
 
104
  if (retarray->data == NULL)
105
    {
106
      if (GFC_DESCRIPTOR_RANK (a) == 1)
107
        {
108
          GFC_DIMENSION_SET(retarray->dim[0], 0,
109
                            GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
110
        }
111
      else if (GFC_DESCRIPTOR_RANK (b) == 1)
112
        {
113
          GFC_DIMENSION_SET(retarray->dim[0], 0,
114
                            GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
115
        }
116
      else
117
        {
118
          GFC_DIMENSION_SET(retarray->dim[0], 0,
119
                            GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
120
 
121
          GFC_DIMENSION_SET(retarray->dim[1], 0,
122
                            GFC_DESCRIPTOR_EXTENT(b,1) - 1,
123
                            GFC_DESCRIPTOR_EXTENT(retarray,0));
124
        }
125
 
126
      retarray->data
127
        = internal_malloc_size (sizeof (GFC_COMPLEX_10) * size0 ((array_t *) retarray));
128
      retarray->offset = 0;
129
    }
130
    else if (unlikely (compile_options.bounds_check))
131
      {
132
        index_type ret_extent, arg_extent;
133
 
134
        if (GFC_DESCRIPTOR_RANK (a) == 1)
135
          {
136
            arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
137
            ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
138
            if (arg_extent != ret_extent)
139
              runtime_error ("Incorrect extent in return array in"
140
                             " MATMUL intrinsic: is %ld, should be %ld",
141
                             (long int) ret_extent, (long int) arg_extent);
142
          }
143
        else if (GFC_DESCRIPTOR_RANK (b) == 1)
144
          {
145
            arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
146
            ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
147
            if (arg_extent != ret_extent)
148
              runtime_error ("Incorrect extent in return array in"
149
                             " MATMUL intrinsic: is %ld, should be %ld",
150
                             (long int) ret_extent, (long int) arg_extent);
151
          }
152
        else
153
          {
154
            arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
155
            ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
156
            if (arg_extent != ret_extent)
157
              runtime_error ("Incorrect extent in return array in"
158
                             " MATMUL intrinsic for dimension 1:"
159
                             " is %ld, should be %ld",
160
                             (long int) ret_extent, (long int) arg_extent);
161
 
162
            arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
163
            ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
164
            if (arg_extent != ret_extent)
165
              runtime_error ("Incorrect extent in return array in"
166
                             " MATMUL intrinsic for dimension 2:"
167
                             " is %ld, should be %ld",
168
                             (long int) ret_extent, (long int) arg_extent);
169
          }
170
      }
171
 
172
 
173
  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
174
    {
175
      /* One-dimensional result may be addressed in the code below
176
         either as a row or a column matrix. We want both cases to
177
         work. */
178
      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
179
    }
180
  else
181
    {
182
      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
183
      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
184
    }
185
 
186
 
187
  if (GFC_DESCRIPTOR_RANK (a) == 1)
188
    {
189
      /* Treat it as a a row matrix A[1,count]. */
190
      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
191
      aystride = 1;
192
 
193
      xcount = 1;
194
      count = GFC_DESCRIPTOR_EXTENT(a,0);
195
    }
196
  else
197
    {
198
      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
199
      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
200
 
201
      count = GFC_DESCRIPTOR_EXTENT(a,1);
202
      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
203
    }
204
 
205
  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
206
    {
207
      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
208
        runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
209
    }
210
 
211
  if (GFC_DESCRIPTOR_RANK (b) == 1)
212
    {
213
      /* Treat it as a column matrix B[count,1] */
214
      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
215
 
216
      /* bystride should never be used for 1-dimensional b.
217
         in case it is we want it to cause a segfault, rather than
218
         an incorrect result. */
219
      bystride = 0xDEADBEEF;
220
      ycount = 1;
221
    }
222
  else
223
    {
224
      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
225
      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
226
      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
227
    }
228
 
229
  abase = a->data;
230
  bbase = b->data;
231
  dest = retarray->data;
232
 
233
 
234
  /* Now that everything is set up, we're performing the multiplication
235
     itself.  */
236
 
237
#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
238
 
239
  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
240
      && (bxstride == 1 || bystride == 1)
241
      && (((float) xcount) * ((float) ycount) * ((float) count)
242
          > POW3(blas_limit)))
243
  {
244
    const int m = xcount, n = ycount, k = count, ldc = rystride;
245
    const GFC_COMPLEX_10 one = 1, zero = 0;
246
    const int lda = (axstride == 1) ? aystride : axstride,
247
              ldb = (bxstride == 1) ? bystride : bxstride;
248
 
249
    if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
250
      {
251
        assert (gemm != NULL);
252
        gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m, &n, &k,
253
              &one, abase, &lda, bbase, &ldb, &zero, dest, &ldc, 1, 1);
254
        return;
255
      }
256
  }
257
 
258
  if (rxstride == 1 && axstride == 1 && bxstride == 1)
259
    {
260
      const GFC_COMPLEX_10 * restrict bbase_y;
261
      GFC_COMPLEX_10 * restrict dest_y;
262
      const GFC_COMPLEX_10 * restrict abase_n;
263
      GFC_COMPLEX_10 bbase_yn;
264
 
265
      if (rystride == xcount)
266
        memset (dest, 0, (sizeof (GFC_COMPLEX_10) * xcount * ycount));
267
      else
268
        {
269
          for (y = 0; y < ycount; y++)
270
            for (x = 0; x < xcount; x++)
271
              dest[x + y*rystride] = (GFC_COMPLEX_10)0;
272
        }
273
 
274
      for (y = 0; y < ycount; y++)
275
        {
276
          bbase_y = bbase + y*bystride;
277
          dest_y = dest + y*rystride;
278
          for (n = 0; n < count; n++)
279
            {
280
              abase_n = abase + n*aystride;
281
              bbase_yn = bbase_y[n];
282
              for (x = 0; x < xcount; x++)
283
                {
284
                  dest_y[x] += abase_n[x] * bbase_yn;
285
                }
286
            }
287
        }
288
    }
289
  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
290
    {
291
      if (GFC_DESCRIPTOR_RANK (a) != 1)
292
        {
293
          const GFC_COMPLEX_10 *restrict abase_x;
294
          const GFC_COMPLEX_10 *restrict bbase_y;
295
          GFC_COMPLEX_10 *restrict dest_y;
296
          GFC_COMPLEX_10 s;
297
 
298
          for (y = 0; y < ycount; y++)
299
            {
300
              bbase_y = &bbase[y*bystride];
301
              dest_y = &dest[y*rystride];
302
              for (x = 0; x < xcount; x++)
303
                {
304
                  abase_x = &abase[x*axstride];
305
                  s = (GFC_COMPLEX_10) 0;
306
                  for (n = 0; n < count; n++)
307
                    s += abase_x[n] * bbase_y[n];
308
                  dest_y[x] = s;
309
                }
310
            }
311
        }
312
      else
313
        {
314
          const GFC_COMPLEX_10 *restrict bbase_y;
315
          GFC_COMPLEX_10 s;
316
 
317
          for (y = 0; y < ycount; y++)
318
            {
319
              bbase_y = &bbase[y*bystride];
320
              s = (GFC_COMPLEX_10) 0;
321
              for (n = 0; n < count; n++)
322
                s += abase[n*axstride] * bbase_y[n];
323
              dest[y*rystride] = s;
324
            }
325
        }
326
    }
327
  else if (axstride < aystride)
328
    {
329
      for (y = 0; y < ycount; y++)
330
        for (x = 0; x < xcount; x++)
331
          dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
332
 
333
      for (y = 0; y < ycount; y++)
334
        for (n = 0; n < count; n++)
335
          for (x = 0; x < xcount; x++)
336
            /* dest[x,y] += a[x,n] * b[n,y] */
337
            dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
338
    }
339
  else if (GFC_DESCRIPTOR_RANK (a) == 1)
340
    {
341
      const GFC_COMPLEX_10 *restrict bbase_y;
342
      GFC_COMPLEX_10 s;
343
 
344
      for (y = 0; y < ycount; y++)
345
        {
346
          bbase_y = &bbase[y*bystride];
347
          s = (GFC_COMPLEX_10) 0;
348
          for (n = 0; n < count; n++)
349
            s += abase[n*axstride] * bbase_y[n*bxstride];
350
          dest[y*rxstride] = s;
351
        }
352
    }
353
  else
354
    {
355
      const GFC_COMPLEX_10 *restrict abase_x;
356
      const GFC_COMPLEX_10 *restrict bbase_y;
357
      GFC_COMPLEX_10 *restrict dest_y;
358
      GFC_COMPLEX_10 s;
359
 
360
      for (y = 0; y < ycount; y++)
361
        {
362
          bbase_y = &bbase[y*bystride];
363
          dest_y = &dest[y*rystride];
364
          for (x = 0; x < xcount; x++)
365
            {
366
              abase_x = &abase[x*axstride];
367
              s = (GFC_COMPLEX_10) 0;
368
              for (n = 0; n < count; n++)
369
                s += abase_x[n*aystride] * bbase_y[n*bxstride];
370
              dest_y[x*rxstride] = s;
371
            }
372
        }
373
    }
374
}
375
 
376
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2025 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.