OpenCores
URL https://opencores.org/ocsvn/scarts/scarts/trunk

Subversion Repositories scarts

[/] [scarts/] [trunk/] [toolchain/] [scarts-gcc/] [gcc-4.1.1/] [libgfortran/] [m4/] [matmul.m4] - Blame information for rev 14

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 14 jlechner
`/* Implementation of the MATMUL intrinsic
2
   Copyright 2002, 2005 Free Software Foundation, Inc.
3
   Contributed by Paul Brook <paul@nowt.org>
4
 
5
This file is part of the GNU Fortran 95 runtime library (libgfortran).
6
 
7
Libgfortran is free software; you can redistribute it and/or
8
modify it under the terms of the GNU General Public
9
License as published by the Free Software Foundation; either
10
version 2 of the License, or (at your option) any later version.
11
 
12
In addition to the permissions in the GNU General Public License, the
13
Free Software Foundation gives you unlimited permission to link the
14
compiled version of this file into combinations with other programs,
15
and to distribute those combinations without any restriction coming
16
from the use of this file.  (The General Public License restrictions
17
do apply in other respects; for example, they cover modification of
18
the file, and distribution when not linked into a combine
19
executable.)
20
 
21
Libgfortran is distributed in the hope that it will be useful,
22
but WITHOUT ANY WARRANTY; without even the implied warranty of
23
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24
GNU General Public License for more details.
25
 
26
You should have received a copy of the GNU General Public
27
License along with libgfortran; see the file COPYING.  If not,
28
write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
29
Boston, MA 02110-1301, USA.  */
30
 
31
#include "config.h"
32
#include <stdlib.h>
33
#include <string.h>
34
#include <assert.h>
35
#include "libgfortran.h"'
36
include(iparm.m4)dnl
37
 
38
`#if defined (HAVE_'rtype_name`)'
39
 
40
/* This is a C version of the following fortran pseudo-code. The key
41
   point is the loop order -- we access all arrays column-first, which
42
   improves the performance enough to boost galgel spec score by 50%.
43
 
44
   DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
45
   C = 0
46
   DO J=1,N
47
     DO K=1,COUNT
48
       DO I=1,M
49
         C(I,J) = C(I,J)+A(I,K)*B(K,J)
50
*/
51
 
52
extern void matmul_`'rtype_code (rtype * const restrict retarray,
53
        rtype * const restrict a, rtype * const restrict b);
54
export_proto(matmul_`'rtype_code);
55
 
56
void
57
matmul_`'rtype_code (rtype * const restrict retarray,
58
        rtype * const restrict a, rtype * const restrict b)
59
{
60
  const rtype_name * restrict abase;
61
  const rtype_name * restrict bbase;
62
  rtype_name * restrict dest;
63
 
64
  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
65
  index_type x, y, n, count, xcount, ycount;
66
 
67
  assert (GFC_DESCRIPTOR_RANK (a) == 2
68
          || GFC_DESCRIPTOR_RANK (b) == 2);
69
 
70
/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
71
 
72
   Either A or B (but not both) can be rank 1:
73
 
74
   o One-dimensional argument A is implicitly treated as a row matrix
75
     dimensioned [1,count], so xcount=1.
76
 
77
   o One-dimensional argument B is implicitly treated as a column matrix
78
     dimensioned [count, 1], so ycount=1.
79
  */
80
 
81
  if (retarray->data == NULL)
82
    {
83
      if (GFC_DESCRIPTOR_RANK (a) == 1)
84
        {
85
          retarray->dim[0].lbound = 0;
86
          retarray->dim[0].ubound = b->dim[1].ubound - b->dim[1].lbound;
87
          retarray->dim[0].stride = 1;
88
        }
89
      else if (GFC_DESCRIPTOR_RANK (b) == 1)
90
        {
91
          retarray->dim[0].lbound = 0;
92
          retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
93
          retarray->dim[0].stride = 1;
94
        }
95
      else
96
        {
97
          retarray->dim[0].lbound = 0;
98
          retarray->dim[0].ubound = a->dim[0].ubound - a->dim[0].lbound;
99
          retarray->dim[0].stride = 1;
100
 
101
          retarray->dim[1].lbound = 0;
102
          retarray->dim[1].ubound = b->dim[1].ubound - b->dim[1].lbound;
103
          retarray->dim[1].stride = retarray->dim[0].ubound+1;
104
        }
105
 
106
      retarray->data
107
        = internal_malloc_size (sizeof (rtype_name) * size0 ((array_t *) retarray));
108
      retarray->offset = 0;
109
    }
110
 
111
  if (retarray->dim[0].stride == 0)
112
    retarray->dim[0].stride = 1;
113
 
114
  /* This prevents constifying the input arguments.  */
115
  if (a->dim[0].stride == 0)
116
    a->dim[0].stride = 1;
117
  if (b->dim[0].stride == 0)
118
    b->dim[0].stride = 1;
119
 
120
sinclude(`matmul_asm_'rtype_code`.m4')dnl
121
 
122
  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
123
    {
124
      /* One-dimensional result may be addressed in the code below
125
         either as a row or a column matrix. We want both cases to
126
         work. */
127
      rxstride = rystride = retarray->dim[0].stride;
128
    }
129
  else
130
    {
131
      rxstride = retarray->dim[0].stride;
132
      rystride = retarray->dim[1].stride;
133
    }
134
 
135
 
136
  if (GFC_DESCRIPTOR_RANK (a) == 1)
137
    {
138
      /* Treat it as a a row matrix A[1,count]. */
139
      axstride = a->dim[0].stride;
140
      aystride = 1;
141
 
142
      xcount = 1;
143
      count = a->dim[0].ubound + 1 - a->dim[0].lbound;
144
    }
145
  else
146
    {
147
      axstride = a->dim[0].stride;
148
      aystride = a->dim[1].stride;
149
 
150
      count = a->dim[1].ubound + 1 - a->dim[1].lbound;
151
      xcount = a->dim[0].ubound + 1 - a->dim[0].lbound;
152
    }
153
 
154
  assert(count == b->dim[0].ubound + 1 - b->dim[0].lbound);
155
 
156
  if (GFC_DESCRIPTOR_RANK (b) == 1)
157
    {
158
      /* Treat it as a column matrix B[count,1] */
159
      bxstride = b->dim[0].stride;
160
 
161
      /* bystride should never be used for 1-dimensional b.
162
         in case it is we want it to cause a segfault, rather than
163
         an incorrect result. */
164
      bystride = 0xDEADBEEF;
165
      ycount = 1;
166
    }
167
  else
168
    {
169
      bxstride = b->dim[0].stride;
170
      bystride = b->dim[1].stride;
171
      ycount = b->dim[1].ubound + 1 - b->dim[1].lbound;
172
    }
173
 
174
  abase = a->data;
175
  bbase = b->data;
176
  dest = retarray->data;
177
 
178
  if (rxstride == 1 && axstride == 1 && bxstride == 1)
179
    {
180
      const rtype_name * restrict bbase_y;
181
      rtype_name * restrict dest_y;
182
      const rtype_name * restrict abase_n;
183
      rtype_name bbase_yn;
184
 
185
      if (rystride == xcount)
186
        memset (dest, 0, (sizeof (rtype_name) * xcount * ycount));
187
      else
188
        {
189
          for (y = 0; y < ycount; y++)
190
            for (x = 0; x < xcount; x++)
191
              dest[x + y*rystride] = (rtype_name)0;
192
        }
193
 
194
      for (y = 0; y < ycount; y++)
195
        {
196
          bbase_y = bbase + y*bystride;
197
          dest_y = dest + y*rystride;
198
          for (n = 0; n < count; n++)
199
            {
200
              abase_n = abase + n*aystride;
201
              bbase_yn = bbase_y[n];
202
              for (x = 0; x < xcount; x++)
203
                {
204
                  dest_y[x] += abase_n[x] * bbase_yn;
205
                }
206
            }
207
        }
208
    }
209
  else
210
    {
211
      for (y = 0; y < ycount; y++)
212
        for (x = 0; x < xcount; x++)
213
          dest[x*rxstride + y*rystride] = (rtype_name)0;
214
 
215
      for (y = 0; y < ycount; y++)
216
        for (n = 0; n < count; n++)
217
          for (x = 0; x < xcount; x++)
218
            /* dest[x,y] += a[x,n] * b[n,y] */
219
            dest[x*rxstride + y*rystride] += abase[x*axstride + n*aystride] * bbase[n*bxstride + y*bystride];
220
    }
221
}
222
 
223
#endif

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.