| 1 |
280 |
jeremybenn |
/* SLP - Basic Block Vectorization
|
| 2 |
|
|
Copyright (C) 2007, 2008, 2009, 2010
|
| 3 |
|
|
Free Software Foundation, Inc.
|
| 4 |
|
|
Contributed by Dorit Naishlos <dorit@il.ibm.com>
|
| 5 |
|
|
and Ira Rosen <irar@il.ibm.com>
|
| 6 |
|
|
|
| 7 |
|
|
This file is part of GCC.
|
| 8 |
|
|
|
| 9 |
|
|
GCC is free software; you can redistribute it and/or modify it under
|
| 10 |
|
|
the terms of the GNU General Public License as published by the Free
|
| 11 |
|
|
Software Foundation; either version 3, or (at your option) any later
|
| 12 |
|
|
version.
|
| 13 |
|
|
|
| 14 |
|
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
| 15 |
|
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
| 16 |
|
|
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
| 17 |
|
|
for more details.
|
| 18 |
|
|
|
| 19 |
|
|
You should have received a copy of the GNU General Public License
|
| 20 |
|
|
along with GCC; see the file COPYING3. If not see
|
| 21 |
|
|
<http://www.gnu.org/licenses/>. */
|
| 22 |
|
|
|
| 23 |
|
|
#include "config.h"
|
| 24 |
|
|
#include "system.h"
|
| 25 |
|
|
#include "coretypes.h"
|
| 26 |
|
|
#include "tm.h"
|
| 27 |
|
|
#include "ggc.h"
|
| 28 |
|
|
#include "tree.h"
|
| 29 |
|
|
#include "target.h"
|
| 30 |
|
|
#include "basic-block.h"
|
| 31 |
|
|
#include "diagnostic.h"
|
| 32 |
|
|
#include "tree-flow.h"
|
| 33 |
|
|
#include "tree-dump.h"
|
| 34 |
|
|
#include "cfgloop.h"
|
| 35 |
|
|
#include "cfglayout.h"
|
| 36 |
|
|
#include "expr.h"
|
| 37 |
|
|
#include "recog.h"
|
| 38 |
|
|
#include "optabs.h"
|
| 39 |
|
|
#include "tree-vectorizer.h"
|
| 40 |
|
|
|
| 41 |
|
|
/* Extract the location of the basic block in the source code.
|
| 42 |
|
|
Return the basic block location if succeed and NULL if not. */
|
| 43 |
|
|
|
| 44 |
|
|
LOC
|
| 45 |
|
|
find_bb_location (basic_block bb)
|
| 46 |
|
|
{
|
| 47 |
|
|
gimple stmt = NULL;
|
| 48 |
|
|
gimple_stmt_iterator si;
|
| 49 |
|
|
|
| 50 |
|
|
if (!bb)
|
| 51 |
|
|
return UNKNOWN_LOC;
|
| 52 |
|
|
|
| 53 |
|
|
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
|
| 54 |
|
|
{
|
| 55 |
|
|
stmt = gsi_stmt (si);
|
| 56 |
|
|
if (gimple_location (stmt) != UNKNOWN_LOC)
|
| 57 |
|
|
return gimple_location (stmt);
|
| 58 |
|
|
}
|
| 59 |
|
|
|
| 60 |
|
|
return UNKNOWN_LOC;
|
| 61 |
|
|
}
|
| 62 |
|
|
|
| 63 |
|
|
|
| 64 |
|
|
/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
|
| 65 |
|
|
|
| 66 |
|
|
static void
|
| 67 |
|
|
vect_free_slp_tree (slp_tree node)
|
| 68 |
|
|
{
|
| 69 |
|
|
if (!node)
|
| 70 |
|
|
return;
|
| 71 |
|
|
|
| 72 |
|
|
if (SLP_TREE_LEFT (node))
|
| 73 |
|
|
vect_free_slp_tree (SLP_TREE_LEFT (node));
|
| 74 |
|
|
|
| 75 |
|
|
if (SLP_TREE_RIGHT (node))
|
| 76 |
|
|
vect_free_slp_tree (SLP_TREE_RIGHT (node));
|
| 77 |
|
|
|
| 78 |
|
|
VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
|
| 79 |
|
|
|
| 80 |
|
|
if (SLP_TREE_VEC_STMTS (node))
|
| 81 |
|
|
VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
|
| 82 |
|
|
|
| 83 |
|
|
free (node);
|
| 84 |
|
|
}
|
| 85 |
|
|
|
| 86 |
|
|
|
| 87 |
|
|
/* Free the memory allocated for the SLP instance. */
|
| 88 |
|
|
|
| 89 |
|
|
void
|
| 90 |
|
|
vect_free_slp_instance (slp_instance instance)
|
| 91 |
|
|
{
|
| 92 |
|
|
vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
|
| 93 |
|
|
VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
|
| 94 |
|
|
VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
|
| 95 |
|
|
}
|
| 96 |
|
|
|
| 97 |
|
|
|
| 98 |
|
|
/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
|
| 99 |
|
|
they are of a legal type and that they match the defs of the first stmt of
|
| 100 |
|
|
the SLP group (stored in FIRST_STMT_...). */
|
| 101 |
|
|
|
| 102 |
|
|
static bool
|
| 103 |
|
|
vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
| 104 |
|
|
slp_tree slp_node, gimple stmt,
|
| 105 |
|
|
VEC (gimple, heap) **def_stmts0,
|
| 106 |
|
|
VEC (gimple, heap) **def_stmts1,
|
| 107 |
|
|
enum vect_def_type *first_stmt_dt0,
|
| 108 |
|
|
enum vect_def_type *first_stmt_dt1,
|
| 109 |
|
|
tree *first_stmt_def0_type,
|
| 110 |
|
|
tree *first_stmt_def1_type,
|
| 111 |
|
|
tree *first_stmt_const_oprnd,
|
| 112 |
|
|
int ncopies_for_cost,
|
| 113 |
|
|
bool *pattern0, bool *pattern1)
|
| 114 |
|
|
{
|
| 115 |
|
|
tree oprnd;
|
| 116 |
|
|
unsigned int i, number_of_oprnds;
|
| 117 |
|
|
tree def;
|
| 118 |
|
|
gimple def_stmt;
|
| 119 |
|
|
enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
|
| 120 |
|
|
stmt_vec_info stmt_info =
|
| 121 |
|
|
vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
|
| 122 |
|
|
enum gimple_rhs_class rhs_class;
|
| 123 |
|
|
struct loop *loop = NULL;
|
| 124 |
|
|
|
| 125 |
|
|
if (loop_vinfo)
|
| 126 |
|
|
loop = LOOP_VINFO_LOOP (loop_vinfo);
|
| 127 |
|
|
|
| 128 |
|
|
rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
|
| 129 |
|
|
number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
|
| 130 |
|
|
|
| 131 |
|
|
for (i = 0; i < number_of_oprnds; i++)
|
| 132 |
|
|
{
|
| 133 |
|
|
oprnd = gimple_op (stmt, i + 1);
|
| 134 |
|
|
|
| 135 |
|
|
if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
|
| 136 |
|
|
&dt[i])
|
| 137 |
|
|
|| (!def_stmt && dt[i] != vect_constant_def))
|
| 138 |
|
|
{
|
| 139 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 140 |
|
|
{
|
| 141 |
|
|
fprintf (vect_dump, "Build SLP failed: can't find def for ");
|
| 142 |
|
|
print_generic_expr (vect_dump, oprnd, TDF_SLIM);
|
| 143 |
|
|
}
|
| 144 |
|
|
|
| 145 |
|
|
return false;
|
| 146 |
|
|
}
|
| 147 |
|
|
|
| 148 |
|
|
/* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
|
| 149 |
|
|
from the pattern. Check that all the stmts of the node are in the
|
| 150 |
|
|
pattern. */
|
| 151 |
|
|
if (loop && def_stmt && gimple_bb (def_stmt)
|
| 152 |
|
|
&& flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
|
| 153 |
|
|
&& vinfo_for_stmt (def_stmt)
|
| 154 |
|
|
&& STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
|
| 155 |
|
|
{
|
| 156 |
|
|
if (!*first_stmt_dt0)
|
| 157 |
|
|
*pattern0 = true;
|
| 158 |
|
|
else
|
| 159 |
|
|
{
|
| 160 |
|
|
if (i == 1 && !*first_stmt_dt1)
|
| 161 |
|
|
*pattern1 = true;
|
| 162 |
|
|
else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
|
| 163 |
|
|
{
|
| 164 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 165 |
|
|
{
|
| 166 |
|
|
fprintf (vect_dump, "Build SLP failed: some of the stmts"
|
| 167 |
|
|
" are in a pattern, and others are not ");
|
| 168 |
|
|
print_generic_expr (vect_dump, oprnd, TDF_SLIM);
|
| 169 |
|
|
}
|
| 170 |
|
|
|
| 171 |
|
|
return false;
|
| 172 |
|
|
}
|
| 173 |
|
|
}
|
| 174 |
|
|
|
| 175 |
|
|
def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
|
| 176 |
|
|
dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
|
| 177 |
|
|
|
| 178 |
|
|
if (*dt == vect_unknown_def_type)
|
| 179 |
|
|
{
|
| 180 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 181 |
|
|
fprintf (vect_dump, "Unsupported pattern.");
|
| 182 |
|
|
return false;
|
| 183 |
|
|
}
|
| 184 |
|
|
|
| 185 |
|
|
switch (gimple_code (def_stmt))
|
| 186 |
|
|
{
|
| 187 |
|
|
case GIMPLE_PHI:
|
| 188 |
|
|
def = gimple_phi_result (def_stmt);
|
| 189 |
|
|
break;
|
| 190 |
|
|
|
| 191 |
|
|
case GIMPLE_ASSIGN:
|
| 192 |
|
|
def = gimple_assign_lhs (def_stmt);
|
| 193 |
|
|
break;
|
| 194 |
|
|
|
| 195 |
|
|
default:
|
| 196 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 197 |
|
|
fprintf (vect_dump, "unsupported defining stmt: ");
|
| 198 |
|
|
return false;
|
| 199 |
|
|
}
|
| 200 |
|
|
}
|
| 201 |
|
|
|
| 202 |
|
|
if (!*first_stmt_dt0)
|
| 203 |
|
|
{
|
| 204 |
|
|
/* op0 of the first stmt of the group - store its info. */
|
| 205 |
|
|
*first_stmt_dt0 = dt[i];
|
| 206 |
|
|
if (def)
|
| 207 |
|
|
*first_stmt_def0_type = TREE_TYPE (def);
|
| 208 |
|
|
else
|
| 209 |
|
|
*first_stmt_const_oprnd = oprnd;
|
| 210 |
|
|
|
| 211 |
|
|
/* Analyze costs (for the first stmt of the group only). */
|
| 212 |
|
|
if (rhs_class != GIMPLE_SINGLE_RHS)
|
| 213 |
|
|
/* Not memory operation (we don't call this functions for loads). */
|
| 214 |
|
|
vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
|
| 215 |
|
|
else
|
| 216 |
|
|
/* Store. */
|
| 217 |
|
|
vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
|
| 218 |
|
|
}
|
| 219 |
|
|
|
| 220 |
|
|
else
|
| 221 |
|
|
{
|
| 222 |
|
|
if (!*first_stmt_dt1 && i == 1)
|
| 223 |
|
|
{
|
| 224 |
|
|
/* op1 of the first stmt of the group - store its info. */
|
| 225 |
|
|
*first_stmt_dt1 = dt[i];
|
| 226 |
|
|
if (def)
|
| 227 |
|
|
*first_stmt_def1_type = TREE_TYPE (def);
|
| 228 |
|
|
else
|
| 229 |
|
|
{
|
| 230 |
|
|
/* We assume that the stmt contains only one constant
|
| 231 |
|
|
operand. We fail otherwise, to be on the safe side. */
|
| 232 |
|
|
if (*first_stmt_const_oprnd)
|
| 233 |
|
|
{
|
| 234 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 235 |
|
|
fprintf (vect_dump, "Build SLP failed: two constant "
|
| 236 |
|
|
"oprnds in stmt");
|
| 237 |
|
|
return false;
|
| 238 |
|
|
}
|
| 239 |
|
|
*first_stmt_const_oprnd = oprnd;
|
| 240 |
|
|
}
|
| 241 |
|
|
}
|
| 242 |
|
|
else
|
| 243 |
|
|
{
|
| 244 |
|
|
/* Not first stmt of the group, check that the def-stmt/s match
|
| 245 |
|
|
the def-stmt/s of the first stmt. */
|
| 246 |
|
|
if ((i == 0
|
| 247 |
|
|
&& (*first_stmt_dt0 != dt[i]
|
| 248 |
|
|
|| (*first_stmt_def0_type && def
|
| 249 |
|
|
&& !types_compatible_p (*first_stmt_def0_type,
|
| 250 |
|
|
TREE_TYPE (def)))))
|
| 251 |
|
|
|| (i == 1
|
| 252 |
|
|
&& (*first_stmt_dt1 != dt[i]
|
| 253 |
|
|
|| (*first_stmt_def1_type && def
|
| 254 |
|
|
&& !types_compatible_p (*first_stmt_def1_type,
|
| 255 |
|
|
TREE_TYPE (def)))))
|
| 256 |
|
|
|| (!def
|
| 257 |
|
|
&& !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
|
| 258 |
|
|
TREE_TYPE (oprnd))))
|
| 259 |
|
|
{
|
| 260 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 261 |
|
|
fprintf (vect_dump, "Build SLP failed: different types ");
|
| 262 |
|
|
|
| 263 |
|
|
return false;
|
| 264 |
|
|
}
|
| 265 |
|
|
}
|
| 266 |
|
|
}
|
| 267 |
|
|
|
| 268 |
|
|
/* Check the types of the definitions. */
|
| 269 |
|
|
switch (dt[i])
|
| 270 |
|
|
{
|
| 271 |
|
|
case vect_constant_def:
|
| 272 |
|
|
case vect_external_def:
|
| 273 |
|
|
break;
|
| 274 |
|
|
|
| 275 |
|
|
case vect_internal_def:
|
| 276 |
|
|
if (i == 0)
|
| 277 |
|
|
VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
|
| 278 |
|
|
else
|
| 279 |
|
|
VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
|
| 280 |
|
|
break;
|
| 281 |
|
|
|
| 282 |
|
|
default:
|
| 283 |
|
|
/* FORNOW: Not supported. */
|
| 284 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 285 |
|
|
{
|
| 286 |
|
|
fprintf (vect_dump, "Build SLP failed: illegal type of def ");
|
| 287 |
|
|
print_generic_expr (vect_dump, def, TDF_SLIM);
|
| 288 |
|
|
}
|
| 289 |
|
|
|
| 290 |
|
|
return false;
|
| 291 |
|
|
}
|
| 292 |
|
|
}
|
| 293 |
|
|
|
| 294 |
|
|
return true;
|
| 295 |
|
|
}
|
| 296 |
|
|
|
| 297 |
|
|
|
| 298 |
|
|
/* Recursively build an SLP tree starting from NODE.
|
| 299 |
|
|
Fail (and return FALSE) if def-stmts are not isomorphic, require data
|
| 300 |
|
|
permutation or are of unsupported types of operation. Otherwise, return
|
| 301 |
|
|
TRUE. */
|
| 302 |
|
|
|
| 303 |
|
|
static bool
|
| 304 |
|
|
vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
| 305 |
|
|
slp_tree *node, unsigned int group_size,
|
| 306 |
|
|
int *inside_cost, int *outside_cost,
|
| 307 |
|
|
int ncopies_for_cost, unsigned int *max_nunits,
|
| 308 |
|
|
VEC (int, heap) **load_permutation,
|
| 309 |
|
|
VEC (slp_tree, heap) **loads,
|
| 310 |
|
|
unsigned int vectorization_factor)
|
| 311 |
|
|
{
|
| 312 |
|
|
VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
|
| 313 |
|
|
VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
|
| 314 |
|
|
unsigned int i;
|
| 315 |
|
|
VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
|
| 316 |
|
|
gimple stmt = VEC_index (gimple, stmts, 0);
|
| 317 |
|
|
enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
|
| 318 |
|
|
enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
|
| 319 |
|
|
enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
|
| 320 |
|
|
tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
|
| 321 |
|
|
tree lhs;
|
| 322 |
|
|
bool stop_recursion = false, need_same_oprnds = false;
|
| 323 |
|
|
tree vectype, scalar_type, first_op1 = NULL_TREE;
|
| 324 |
|
|
unsigned int ncopies;
|
| 325 |
|
|
optab optab;
|
| 326 |
|
|
int icode;
|
| 327 |
|
|
enum machine_mode optab_op2_mode;
|
| 328 |
|
|
enum machine_mode vec_mode;
|
| 329 |
|
|
tree first_stmt_const_oprnd = NULL_TREE;
|
| 330 |
|
|
struct data_reference *first_dr;
|
| 331 |
|
|
bool pattern0 = false, pattern1 = false;
|
| 332 |
|
|
HOST_WIDE_INT dummy;
|
| 333 |
|
|
bool permutation = false;
|
| 334 |
|
|
unsigned int load_place;
|
| 335 |
|
|
gimple first_load;
|
| 336 |
|
|
|
| 337 |
|
|
/* For every stmt in NODE find its def stmt/s. */
|
| 338 |
|
|
for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
|
| 339 |
|
|
{
|
| 340 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 341 |
|
|
{
|
| 342 |
|
|
fprintf (vect_dump, "Build SLP for ");
|
| 343 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 344 |
|
|
}
|
| 345 |
|
|
|
| 346 |
|
|
lhs = gimple_get_lhs (stmt);
|
| 347 |
|
|
if (lhs == NULL_TREE)
|
| 348 |
|
|
{
|
| 349 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 350 |
|
|
{
|
| 351 |
|
|
fprintf (vect_dump,
|
| 352 |
|
|
"Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
|
| 353 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 354 |
|
|
}
|
| 355 |
|
|
|
| 356 |
|
|
return false;
|
| 357 |
|
|
}
|
| 358 |
|
|
|
| 359 |
|
|
scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
|
| 360 |
|
|
vectype = get_vectype_for_scalar_type (scalar_type);
|
| 361 |
|
|
if (!vectype)
|
| 362 |
|
|
{
|
| 363 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 364 |
|
|
{
|
| 365 |
|
|
fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
|
| 366 |
|
|
print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
|
| 367 |
|
|
}
|
| 368 |
|
|
return false;
|
| 369 |
|
|
}
|
| 370 |
|
|
|
| 371 |
|
|
ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
|
| 372 |
|
|
if (ncopies != 1)
|
| 373 |
|
|
{
|
| 374 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 375 |
|
|
fprintf (vect_dump, "SLP with multiple types ");
|
| 376 |
|
|
|
| 377 |
|
|
/* FORNOW: multiple types are unsupported in BB SLP. */
|
| 378 |
|
|
if (bb_vinfo)
|
| 379 |
|
|
return false;
|
| 380 |
|
|
}
|
| 381 |
|
|
|
| 382 |
|
|
/* In case of multiple types we need to detect the smallest type. */
|
| 383 |
|
|
if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
|
| 384 |
|
|
*max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
| 385 |
|
|
|
| 386 |
|
|
if (is_gimple_call (stmt))
|
| 387 |
|
|
rhs_code = CALL_EXPR;
|
| 388 |
|
|
else
|
| 389 |
|
|
rhs_code = gimple_assign_rhs_code (stmt);
|
| 390 |
|
|
|
| 391 |
|
|
/* Check the operation. */
|
| 392 |
|
|
if (i == 0)
|
| 393 |
|
|
{
|
| 394 |
|
|
first_stmt_code = rhs_code;
|
| 395 |
|
|
|
| 396 |
|
|
/* Shift arguments should be equal in all the packed stmts for a
|
| 397 |
|
|
vector shift with scalar shift operand. */
|
| 398 |
|
|
if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
|
| 399 |
|
|
|| rhs_code == LROTATE_EXPR
|
| 400 |
|
|
|| rhs_code == RROTATE_EXPR)
|
| 401 |
|
|
{
|
| 402 |
|
|
vec_mode = TYPE_MODE (vectype);
|
| 403 |
|
|
|
| 404 |
|
|
/* First see if we have a vector/vector shift. */
|
| 405 |
|
|
optab = optab_for_tree_code (rhs_code, vectype,
|
| 406 |
|
|
optab_vector);
|
| 407 |
|
|
|
| 408 |
|
|
if (!optab
|
| 409 |
|
|
|| (optab->handlers[(int) vec_mode].insn_code
|
| 410 |
|
|
== CODE_FOR_nothing))
|
| 411 |
|
|
{
|
| 412 |
|
|
/* No vector/vector shift, try for a vector/scalar shift. */
|
| 413 |
|
|
optab = optab_for_tree_code (rhs_code, vectype,
|
| 414 |
|
|
optab_scalar);
|
| 415 |
|
|
|
| 416 |
|
|
if (!optab)
|
| 417 |
|
|
{
|
| 418 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 419 |
|
|
fprintf (vect_dump, "Build SLP failed: no optab.");
|
| 420 |
|
|
return false;
|
| 421 |
|
|
}
|
| 422 |
|
|
icode = (int) optab->handlers[(int) vec_mode].insn_code;
|
| 423 |
|
|
if (icode == CODE_FOR_nothing)
|
| 424 |
|
|
{
|
| 425 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 426 |
|
|
fprintf (vect_dump, "Build SLP failed: "
|
| 427 |
|
|
"op not supported by target.");
|
| 428 |
|
|
return false;
|
| 429 |
|
|
}
|
| 430 |
|
|
optab_op2_mode = insn_data[icode].operand[2].mode;
|
| 431 |
|
|
if (!VECTOR_MODE_P (optab_op2_mode))
|
| 432 |
|
|
{
|
| 433 |
|
|
need_same_oprnds = true;
|
| 434 |
|
|
first_op1 = gimple_assign_rhs2 (stmt);
|
| 435 |
|
|
}
|
| 436 |
|
|
}
|
| 437 |
|
|
}
|
| 438 |
|
|
}
|
| 439 |
|
|
else
|
| 440 |
|
|
{
|
| 441 |
|
|
if (first_stmt_code != rhs_code
|
| 442 |
|
|
&& (first_stmt_code != IMAGPART_EXPR
|
| 443 |
|
|
|| rhs_code != REALPART_EXPR)
|
| 444 |
|
|
&& (first_stmt_code != REALPART_EXPR
|
| 445 |
|
|
|| rhs_code != IMAGPART_EXPR))
|
| 446 |
|
|
{
|
| 447 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 448 |
|
|
{
|
| 449 |
|
|
fprintf (vect_dump,
|
| 450 |
|
|
"Build SLP failed: different operation in stmt ");
|
| 451 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 452 |
|
|
}
|
| 453 |
|
|
|
| 454 |
|
|
return false;
|
| 455 |
|
|
}
|
| 456 |
|
|
|
| 457 |
|
|
if (need_same_oprnds
|
| 458 |
|
|
&& !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
|
| 459 |
|
|
{
|
| 460 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 461 |
|
|
{
|
| 462 |
|
|
fprintf (vect_dump,
|
| 463 |
|
|
"Build SLP failed: different shift arguments in ");
|
| 464 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 465 |
|
|
}
|
| 466 |
|
|
|
| 467 |
|
|
return false;
|
| 468 |
|
|
}
|
| 469 |
|
|
}
|
| 470 |
|
|
|
| 471 |
|
|
/* Strided store or load. */
|
| 472 |
|
|
if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
|
| 473 |
|
|
{
|
| 474 |
|
|
if (REFERENCE_CLASS_P (lhs))
|
| 475 |
|
|
{
|
| 476 |
|
|
/* Store. */
|
| 477 |
|
|
if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
|
| 478 |
|
|
stmt, &def_stmts0, &def_stmts1,
|
| 479 |
|
|
&first_stmt_dt0,
|
| 480 |
|
|
&first_stmt_dt1,
|
| 481 |
|
|
&first_stmt_def0_type,
|
| 482 |
|
|
&first_stmt_def1_type,
|
| 483 |
|
|
&first_stmt_const_oprnd,
|
| 484 |
|
|
ncopies_for_cost,
|
| 485 |
|
|
&pattern0, &pattern1))
|
| 486 |
|
|
return false;
|
| 487 |
|
|
}
|
| 488 |
|
|
else
|
| 489 |
|
|
{
|
| 490 |
|
|
/* Load. */
|
| 491 |
|
|
/* FORNOW: Check that there is no gap between the loads. */
|
| 492 |
|
|
if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
|
| 493 |
|
|
&& DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
|
| 494 |
|
|
|| (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
|
| 495 |
|
|
&& DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
|
| 496 |
|
|
{
|
| 497 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 498 |
|
|
{
|
| 499 |
|
|
fprintf (vect_dump, "Build SLP failed: strided "
|
| 500 |
|
|
"loads have gaps ");
|
| 501 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 502 |
|
|
}
|
| 503 |
|
|
|
| 504 |
|
|
return false;
|
| 505 |
|
|
}
|
| 506 |
|
|
|
| 507 |
|
|
/* Check that the size of interleaved loads group is not
|
| 508 |
|
|
greater than the SLP group size. */
|
| 509 |
|
|
if (DR_GROUP_SIZE (vinfo_for_stmt (stmt))
|
| 510 |
|
|
> ncopies * group_size)
|
| 511 |
|
|
{
|
| 512 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 513 |
|
|
{
|
| 514 |
|
|
fprintf (vect_dump, "Build SLP failed: the number of "
|
| 515 |
|
|
"interleaved loads is greater than"
|
| 516 |
|
|
" the SLP group size ");
|
| 517 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 518 |
|
|
}
|
| 519 |
|
|
|
| 520 |
|
|
return false;
|
| 521 |
|
|
}
|
| 522 |
|
|
|
| 523 |
|
|
first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
|
| 524 |
|
|
|
| 525 |
|
|
if (first_load == stmt)
|
| 526 |
|
|
{
|
| 527 |
|
|
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
|
| 528 |
|
|
if (vect_supportable_dr_alignment (first_dr)
|
| 529 |
|
|
== dr_unaligned_unsupported)
|
| 530 |
|
|
{
|
| 531 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 532 |
|
|
{
|
| 533 |
|
|
fprintf (vect_dump, "Build SLP failed: unsupported "
|
| 534 |
|
|
"unaligned load ");
|
| 535 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 536 |
|
|
}
|
| 537 |
|
|
|
| 538 |
|
|
return false;
|
| 539 |
|
|
}
|
| 540 |
|
|
|
| 541 |
|
|
/* Analyze costs (for the first stmt in the group). */
|
| 542 |
|
|
vect_model_load_cost (vinfo_for_stmt (stmt),
|
| 543 |
|
|
ncopies_for_cost, *node);
|
| 544 |
|
|
}
|
| 545 |
|
|
|
| 546 |
|
|
/* Store the place of this load in the interleaving chain. In
|
| 547 |
|
|
case that permutation is needed we later decide if a specific
|
| 548 |
|
|
permutation is supported. */
|
| 549 |
|
|
load_place = vect_get_place_in_interleaving_chain (stmt,
|
| 550 |
|
|
first_load);
|
| 551 |
|
|
if (load_place != i)
|
| 552 |
|
|
permutation = true;
|
| 553 |
|
|
|
| 554 |
|
|
VEC_safe_push (int, heap, *load_permutation, load_place);
|
| 555 |
|
|
|
| 556 |
|
|
/* We stop the tree when we reach a group of loads. */
|
| 557 |
|
|
stop_recursion = true;
|
| 558 |
|
|
continue;
|
| 559 |
|
|
}
|
| 560 |
|
|
} /* Strided access. */
|
| 561 |
|
|
else
|
| 562 |
|
|
{
|
| 563 |
|
|
if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
|
| 564 |
|
|
{
|
| 565 |
|
|
/* Not strided load. */
|
| 566 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 567 |
|
|
{
|
| 568 |
|
|
fprintf (vect_dump, "Build SLP failed: not strided load ");
|
| 569 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 570 |
|
|
}
|
| 571 |
|
|
|
| 572 |
|
|
/* FORNOW: Not strided loads are not supported. */
|
| 573 |
|
|
return false;
|
| 574 |
|
|
}
|
| 575 |
|
|
|
| 576 |
|
|
/* Not memory operation. */
|
| 577 |
|
|
if (TREE_CODE_CLASS (rhs_code) != tcc_binary
|
| 578 |
|
|
&& TREE_CODE_CLASS (rhs_code) != tcc_unary)
|
| 579 |
|
|
{
|
| 580 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 581 |
|
|
{
|
| 582 |
|
|
fprintf (vect_dump, "Build SLP failed: operation");
|
| 583 |
|
|
fprintf (vect_dump, " unsupported ");
|
| 584 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 585 |
|
|
}
|
| 586 |
|
|
|
| 587 |
|
|
return false;
|
| 588 |
|
|
}
|
| 589 |
|
|
|
| 590 |
|
|
/* Find the def-stmts. */
|
| 591 |
|
|
if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
|
| 592 |
|
|
&def_stmts0, &def_stmts1,
|
| 593 |
|
|
&first_stmt_dt0, &first_stmt_dt1,
|
| 594 |
|
|
&first_stmt_def0_type,
|
| 595 |
|
|
&first_stmt_def1_type,
|
| 596 |
|
|
&first_stmt_const_oprnd,
|
| 597 |
|
|
ncopies_for_cost,
|
| 598 |
|
|
&pattern0, &pattern1))
|
| 599 |
|
|
return false;
|
| 600 |
|
|
}
|
| 601 |
|
|
}
|
| 602 |
|
|
|
| 603 |
|
|
/* Add the costs of the node to the overall instance costs. */
|
| 604 |
|
|
*inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
|
| 605 |
|
|
*outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
|
| 606 |
|
|
|
| 607 |
|
|
/* Strided loads were reached - stop the recursion. */
|
| 608 |
|
|
if (stop_recursion)
|
| 609 |
|
|
{
|
| 610 |
|
|
if (permutation)
|
| 611 |
|
|
{
|
| 612 |
|
|
VEC_safe_push (slp_tree, heap, *loads, *node);
|
| 613 |
|
|
*inside_cost += TARG_VEC_PERMUTE_COST * group_size;
|
| 614 |
|
|
}
|
| 615 |
|
|
|
| 616 |
|
|
return true;
|
| 617 |
|
|
}
|
| 618 |
|
|
|
| 619 |
|
|
/* Create SLP_TREE nodes for the definition node/s. */
|
| 620 |
|
|
if (first_stmt_dt0 == vect_internal_def)
|
| 621 |
|
|
{
|
| 622 |
|
|
slp_tree left_node = XNEW (struct _slp_tree);
|
| 623 |
|
|
SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
|
| 624 |
|
|
SLP_TREE_VEC_STMTS (left_node) = NULL;
|
| 625 |
|
|
SLP_TREE_LEFT (left_node) = NULL;
|
| 626 |
|
|
SLP_TREE_RIGHT (left_node) = NULL;
|
| 627 |
|
|
SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
|
| 628 |
|
|
SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
|
| 629 |
|
|
if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
|
| 630 |
|
|
inside_cost, outside_cost, ncopies_for_cost,
|
| 631 |
|
|
max_nunits, load_permutation, loads,
|
| 632 |
|
|
vectorization_factor))
|
| 633 |
|
|
return false;
|
| 634 |
|
|
|
| 635 |
|
|
SLP_TREE_LEFT (*node) = left_node;
|
| 636 |
|
|
}
|
| 637 |
|
|
|
| 638 |
|
|
if (first_stmt_dt1 == vect_internal_def)
|
| 639 |
|
|
{
|
| 640 |
|
|
slp_tree right_node = XNEW (struct _slp_tree);
|
| 641 |
|
|
SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
|
| 642 |
|
|
SLP_TREE_VEC_STMTS (right_node) = NULL;
|
| 643 |
|
|
SLP_TREE_LEFT (right_node) = NULL;
|
| 644 |
|
|
SLP_TREE_RIGHT (right_node) = NULL;
|
| 645 |
|
|
SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
|
| 646 |
|
|
SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
|
| 647 |
|
|
if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
|
| 648 |
|
|
inside_cost, outside_cost, ncopies_for_cost,
|
| 649 |
|
|
max_nunits, load_permutation, loads,
|
| 650 |
|
|
vectorization_factor))
|
| 651 |
|
|
return false;
|
| 652 |
|
|
|
| 653 |
|
|
SLP_TREE_RIGHT (*node) = right_node;
|
| 654 |
|
|
}
|
| 655 |
|
|
|
| 656 |
|
|
return true;
|
| 657 |
|
|
}
|
| 658 |
|
|
|
| 659 |
|
|
|
| 660 |
|
|
static void
|
| 661 |
|
|
vect_print_slp_tree (slp_tree node)
|
| 662 |
|
|
{
|
| 663 |
|
|
int i;
|
| 664 |
|
|
gimple stmt;
|
| 665 |
|
|
|
| 666 |
|
|
if (!node)
|
| 667 |
|
|
return;
|
| 668 |
|
|
|
| 669 |
|
|
fprintf (vect_dump, "node ");
|
| 670 |
|
|
for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
|
| 671 |
|
|
{
|
| 672 |
|
|
fprintf (vect_dump, "\n\tstmt %d ", i);
|
| 673 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 674 |
|
|
}
|
| 675 |
|
|
fprintf (vect_dump, "\n");
|
| 676 |
|
|
|
| 677 |
|
|
vect_print_slp_tree (SLP_TREE_LEFT (node));
|
| 678 |
|
|
vect_print_slp_tree (SLP_TREE_RIGHT (node));
|
| 679 |
|
|
}
|
| 680 |
|
|
|
| 681 |
|
|
|
| 682 |
|
|
/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
|
| 683 |
|
|
If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
|
| 684 |
|
|
J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
|
| 685 |
|
|
stmts in NODE are to be marked. */
|
| 686 |
|
|
|
| 687 |
|
|
static void
|
| 688 |
|
|
vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
|
| 689 |
|
|
{
|
| 690 |
|
|
int i;
|
| 691 |
|
|
gimple stmt;
|
| 692 |
|
|
|
| 693 |
|
|
if (!node)
|
| 694 |
|
|
return;
|
| 695 |
|
|
|
| 696 |
|
|
for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
|
| 697 |
|
|
if (j < 0 || i == j)
|
| 698 |
|
|
STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
|
| 699 |
|
|
|
| 700 |
|
|
vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
|
| 701 |
|
|
vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
|
| 702 |
|
|
}
|
| 703 |
|
|
|
| 704 |
|
|
|
| 705 |
|
|
/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
|
| 706 |
|
|
|
| 707 |
|
|
static void
|
| 708 |
|
|
vect_mark_slp_stmts_relevant (slp_tree node)
|
| 709 |
|
|
{
|
| 710 |
|
|
int i;
|
| 711 |
|
|
gimple stmt;
|
| 712 |
|
|
stmt_vec_info stmt_info;
|
| 713 |
|
|
|
| 714 |
|
|
if (!node)
|
| 715 |
|
|
return;
|
| 716 |
|
|
|
| 717 |
|
|
for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
|
| 718 |
|
|
{
|
| 719 |
|
|
stmt_info = vinfo_for_stmt (stmt);
|
| 720 |
|
|
gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
|
| 721 |
|
|
|| STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
|
| 722 |
|
|
STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
|
| 723 |
|
|
}
|
| 724 |
|
|
|
| 725 |
|
|
vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
|
| 726 |
|
|
vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
|
| 727 |
|
|
}
|
| 728 |
|
|
|
| 729 |
|
|
|
| 730 |
|
|
/* Check if the permutation required by the SLP INSTANCE is supported.
|
| 731 |
|
|
Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
|
| 732 |
|
|
|
| 733 |
|
|
static bool
|
| 734 |
|
|
vect_supported_slp_permutation_p (slp_instance instance)
|
| 735 |
|
|
{
|
| 736 |
|
|
slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
|
| 737 |
|
|
gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
|
| 738 |
|
|
gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
|
| 739 |
|
|
VEC (slp_tree, heap) *sorted_loads = NULL;
|
| 740 |
|
|
int index;
|
| 741 |
|
|
slp_tree *tmp_loads = NULL;
|
| 742 |
|
|
int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
|
| 743 |
|
|
slp_tree load;
|
| 744 |
|
|
|
| 745 |
|
|
/* FORNOW: The only supported loads permutation is loads from the same
|
| 746 |
|
|
location in all the loads in the node, when the data-refs in
|
| 747 |
|
|
nodes of LOADS constitute an interleaving chain.
|
| 748 |
|
|
Sort the nodes according to the order of accesses in the chain. */
|
| 749 |
|
|
tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
|
| 750 |
|
|
for (i = 0, j = 0;
|
| 751 |
|
|
VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
|
| 752 |
|
|
&& VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
|
| 753 |
|
|
i += group_size, j++)
|
| 754 |
|
|
{
|
| 755 |
|
|
gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
|
| 756 |
|
|
/* Check that the loads are all in the same interleaving chain. */
|
| 757 |
|
|
if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
|
| 758 |
|
|
{
|
| 759 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 760 |
|
|
{
|
| 761 |
|
|
fprintf (vect_dump, "Build SLP failed: unsupported data "
|
| 762 |
|
|
"permutation ");
|
| 763 |
|
|
print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
|
| 764 |
|
|
}
|
| 765 |
|
|
|
| 766 |
|
|
free (tmp_loads);
|
| 767 |
|
|
return false;
|
| 768 |
|
|
}
|
| 769 |
|
|
|
| 770 |
|
|
tmp_loads[index] = load;
|
| 771 |
|
|
}
|
| 772 |
|
|
|
| 773 |
|
|
sorted_loads = VEC_alloc (slp_tree, heap, group_size);
|
| 774 |
|
|
for (i = 0; i < group_size; i++)
|
| 775 |
|
|
VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
|
| 776 |
|
|
|
| 777 |
|
|
VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
|
| 778 |
|
|
SLP_INSTANCE_LOADS (instance) = sorted_loads;
|
| 779 |
|
|
free (tmp_loads);
|
| 780 |
|
|
|
| 781 |
|
|
if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
|
| 782 |
|
|
SLP_INSTANCE_UNROLLING_FACTOR (instance),
|
| 783 |
|
|
instance, true))
|
| 784 |
|
|
return false;
|
| 785 |
|
|
|
| 786 |
|
|
return true;
|
| 787 |
|
|
}
|
| 788 |
|
|
|
| 789 |
|
|
|
| 790 |
|
|
/* Check if the required load permutation is supported.
|
| 791 |
|
|
LOAD_PERMUTATION contains a list of indices of the loads.
|
| 792 |
|
|
In SLP this permutation is relative to the order of strided stores that are
|
| 793 |
|
|
the base of the SLP instance. */
|
| 794 |
|
|
|
| 795 |
|
|
static bool
|
| 796 |
|
|
vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
|
| 797 |
|
|
VEC (int, heap) *load_permutation)
|
| 798 |
|
|
{
|
| 799 |
|
|
int i = 0, j, prev = -1, next, k;
|
| 800 |
|
|
bool supported;
|
| 801 |
|
|
sbitmap load_index;
|
| 802 |
|
|
|
| 803 |
|
|
/* FORNOW: permutations are only supported in SLP. */
|
| 804 |
|
|
if (!slp_instn)
|
| 805 |
|
|
return false;
|
| 806 |
|
|
|
| 807 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 808 |
|
|
{
|
| 809 |
|
|
fprintf (vect_dump, "Load permutation ");
|
| 810 |
|
|
for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
|
| 811 |
|
|
fprintf (vect_dump, "%d ", next);
|
| 812 |
|
|
}
|
| 813 |
|
|
|
| 814 |
|
|
/* FORNOW: the only supported permutation is 0..01..1.. of length equal to
|
| 815 |
|
|
GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
|
| 816 |
|
|
well. */
|
| 817 |
|
|
if (VEC_length (int, load_permutation)
|
| 818 |
|
|
!= (unsigned int) (group_size * group_size))
|
| 819 |
|
|
return false;
|
| 820 |
|
|
|
| 821 |
|
|
supported = true;
|
| 822 |
|
|
load_index = sbitmap_alloc (group_size);
|
| 823 |
|
|
sbitmap_zero (load_index);
|
| 824 |
|
|
for (j = 0; j < group_size; j++)
|
| 825 |
|
|
{
|
| 826 |
|
|
for (i = j * group_size, k = 0;
|
| 827 |
|
|
VEC_iterate (int, load_permutation, i, next) && k < group_size;
|
| 828 |
|
|
i++, k++)
|
| 829 |
|
|
{
|
| 830 |
|
|
if (i != j * group_size && next != prev)
|
| 831 |
|
|
{
|
| 832 |
|
|
supported = false;
|
| 833 |
|
|
break;
|
| 834 |
|
|
}
|
| 835 |
|
|
|
| 836 |
|
|
prev = next;
|
| 837 |
|
|
}
|
| 838 |
|
|
|
| 839 |
|
|
if (TEST_BIT (load_index, prev))
|
| 840 |
|
|
{
|
| 841 |
|
|
supported = false;
|
| 842 |
|
|
break;
|
| 843 |
|
|
}
|
| 844 |
|
|
|
| 845 |
|
|
SET_BIT (load_index, prev);
|
| 846 |
|
|
}
|
| 847 |
|
|
|
| 848 |
|
|
for (j = 0; j < group_size; j++)
|
| 849 |
|
|
if (!TEST_BIT (load_index, j))
|
| 850 |
|
|
return false;
|
| 851 |
|
|
|
| 852 |
|
|
sbitmap_free (load_index);
|
| 853 |
|
|
|
| 854 |
|
|
if (supported && i == group_size * group_size
|
| 855 |
|
|
&& vect_supported_slp_permutation_p (slp_instn))
|
| 856 |
|
|
return true;
|
| 857 |
|
|
|
| 858 |
|
|
return false;
|
| 859 |
|
|
}
|
| 860 |
|
|
|
| 861 |
|
|
|
| 862 |
|
|
/* Find the first load in the loop that belongs to INSTANCE.
|
| 863 |
|
|
When loads are in several SLP nodes, there can be a case in which the first
|
| 864 |
|
|
load does not appear in the first SLP node to be transformed, causing
|
| 865 |
|
|
incorrect order of statements. Since we generate all the loads together,
|
| 866 |
|
|
they must be inserted before the first load of the SLP instance and not
|
| 867 |
|
|
before the first load of the first node of the instance. */
|
| 868 |
|
|
static gimple
|
| 869 |
|
|
vect_find_first_load_in_slp_instance (slp_instance instance)
|
| 870 |
|
|
{
|
| 871 |
|
|
int i, j;
|
| 872 |
|
|
slp_tree load_node;
|
| 873 |
|
|
gimple first_load = NULL, load;
|
| 874 |
|
|
|
| 875 |
|
|
for (i = 0;
|
| 876 |
|
|
VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
|
| 877 |
|
|
i++)
|
| 878 |
|
|
for (j = 0;
|
| 879 |
|
|
VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
|
| 880 |
|
|
j++)
|
| 881 |
|
|
first_load = get_earlier_stmt (load, first_load);
|
| 882 |
|
|
|
| 883 |
|
|
return first_load;
|
| 884 |
|
|
}
|
| 885 |
|
|
|
| 886 |
|
|
|
| 887 |
|
|
/* Analyze an SLP instance starting from a group of strided stores. Call
|
| 888 |
|
|
vect_build_slp_tree to build a tree of packed stmts if possible.
|
| 889 |
|
|
Return FALSE if it's impossible to SLP any stmt in the loop. */
|
| 890 |
|
|
|
| 891 |
|
|
static bool
|
| 892 |
|
|
vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
|
| 893 |
|
|
gimple stmt)
|
| 894 |
|
|
{
|
| 895 |
|
|
slp_instance new_instance;
|
| 896 |
|
|
slp_tree node = XNEW (struct _slp_tree);
|
| 897 |
|
|
unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
|
| 898 |
|
|
unsigned int unrolling_factor = 1, nunits;
|
| 899 |
|
|
tree vectype, scalar_type;
|
| 900 |
|
|
gimple next;
|
| 901 |
|
|
unsigned int vectorization_factor = 0;
|
| 902 |
|
|
int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
|
| 903 |
|
|
unsigned int max_nunits = 0;
|
| 904 |
|
|
VEC (int, heap) *load_permutation;
|
| 905 |
|
|
VEC (slp_tree, heap) *loads;
|
| 906 |
|
|
|
| 907 |
|
|
scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (
|
| 908 |
|
|
vinfo_for_stmt (stmt))));
|
| 909 |
|
|
vectype = get_vectype_for_scalar_type (scalar_type);
|
| 910 |
|
|
if (!vectype)
|
| 911 |
|
|
{
|
| 912 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 913 |
|
|
{
|
| 914 |
|
|
fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
|
| 915 |
|
|
print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
|
| 916 |
|
|
}
|
| 917 |
|
|
return false;
|
| 918 |
|
|
}
|
| 919 |
|
|
|
| 920 |
|
|
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
| 921 |
|
|
if (loop_vinfo)
|
| 922 |
|
|
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
| 923 |
|
|
else
|
| 924 |
|
|
/* No multitypes in BB SLP. */
|
| 925 |
|
|
vectorization_factor = nunits;
|
| 926 |
|
|
|
| 927 |
|
|
/* Calculate the unrolling factor. */
|
| 928 |
|
|
unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
|
| 929 |
|
|
if (unrolling_factor != 1 && !loop_vinfo)
|
| 930 |
|
|
{
|
| 931 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 932 |
|
|
fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
|
| 933 |
|
|
" block SLP");
|
| 934 |
|
|
|
| 935 |
|
|
return false;
|
| 936 |
|
|
}
|
| 937 |
|
|
|
| 938 |
|
|
/* Create a node (a root of the SLP tree) for the packed strided stores. */
|
| 939 |
|
|
SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
|
| 940 |
|
|
next = stmt;
|
| 941 |
|
|
/* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
|
| 942 |
|
|
while (next)
|
| 943 |
|
|
{
|
| 944 |
|
|
VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
|
| 945 |
|
|
next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
|
| 946 |
|
|
}
|
| 947 |
|
|
|
| 948 |
|
|
SLP_TREE_VEC_STMTS (node) = NULL;
|
| 949 |
|
|
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
|
| 950 |
|
|
SLP_TREE_LEFT (node) = NULL;
|
| 951 |
|
|
SLP_TREE_RIGHT (node) = NULL;
|
| 952 |
|
|
SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
|
| 953 |
|
|
SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
|
| 954 |
|
|
|
| 955 |
|
|
/* Calculate the number of vector stmts to create based on the unrolling
|
| 956 |
|
|
factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
|
| 957 |
|
|
GROUP_SIZE / NUNITS otherwise. */
|
| 958 |
|
|
ncopies_for_cost = unrolling_factor * group_size / nunits;
|
| 959 |
|
|
|
| 960 |
|
|
load_permutation = VEC_alloc (int, heap, group_size * group_size);
|
| 961 |
|
|
loads = VEC_alloc (slp_tree, heap, group_size);
|
| 962 |
|
|
|
| 963 |
|
|
/* Build the tree for the SLP instance. */
|
| 964 |
|
|
if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
|
| 965 |
|
|
&inside_cost, &outside_cost, ncopies_for_cost,
|
| 966 |
|
|
&max_nunits, &load_permutation, &loads,
|
| 967 |
|
|
vectorization_factor))
|
| 968 |
|
|
{
|
| 969 |
|
|
/* Create a new SLP instance. */
|
| 970 |
|
|
new_instance = XNEW (struct _slp_instance);
|
| 971 |
|
|
SLP_INSTANCE_TREE (new_instance) = node;
|
| 972 |
|
|
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
|
| 973 |
|
|
/* Calculate the unrolling factor based on the smallest type in the
|
| 974 |
|
|
loop. */
|
| 975 |
|
|
if (max_nunits > nunits)
|
| 976 |
|
|
unrolling_factor = least_common_multiple (max_nunits, group_size)
|
| 977 |
|
|
/ group_size;
|
| 978 |
|
|
|
| 979 |
|
|
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
|
| 980 |
|
|
SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
|
| 981 |
|
|
SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
|
| 982 |
|
|
SLP_INSTANCE_LOADS (new_instance) = loads;
|
| 983 |
|
|
SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
|
| 984 |
|
|
SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
|
| 985 |
|
|
if (VEC_length (slp_tree, loads))
|
| 986 |
|
|
{
|
| 987 |
|
|
if (!vect_supported_load_permutation_p (new_instance, group_size,
|
| 988 |
|
|
load_permutation))
|
| 989 |
|
|
{
|
| 990 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 991 |
|
|
{
|
| 992 |
|
|
fprintf (vect_dump, "Build SLP failed: unsupported load "
|
| 993 |
|
|
"permutation ");
|
| 994 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 995 |
|
|
}
|
| 996 |
|
|
|
| 997 |
|
|
vect_free_slp_instance (new_instance);
|
| 998 |
|
|
return false;
|
| 999 |
|
|
}
|
| 1000 |
|
|
|
| 1001 |
|
|
SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
|
| 1002 |
|
|
= vect_find_first_load_in_slp_instance (new_instance);
|
| 1003 |
|
|
}
|
| 1004 |
|
|
else
|
| 1005 |
|
|
VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
|
| 1006 |
|
|
|
| 1007 |
|
|
if (loop_vinfo)
|
| 1008 |
|
|
VEC_safe_push (slp_instance, heap,
|
| 1009 |
|
|
LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
|
| 1010 |
|
|
new_instance);
|
| 1011 |
|
|
else
|
| 1012 |
|
|
VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
|
| 1013 |
|
|
new_instance);
|
| 1014 |
|
|
|
| 1015 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 1016 |
|
|
vect_print_slp_tree (node);
|
| 1017 |
|
|
|
| 1018 |
|
|
return true;
|
| 1019 |
|
|
}
|
| 1020 |
|
|
|
| 1021 |
|
|
/* Failed to SLP. */
|
| 1022 |
|
|
/* Free the allocated memory. */
|
| 1023 |
|
|
vect_free_slp_tree (node);
|
| 1024 |
|
|
VEC_free (int, heap, load_permutation);
|
| 1025 |
|
|
VEC_free (slp_tree, heap, loads);
|
| 1026 |
|
|
|
| 1027 |
|
|
return false;
|
| 1028 |
|
|
}
|
| 1029 |
|
|
|
| 1030 |
|
|
|
| 1031 |
|
|
/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
|
| 1032 |
|
|
trees of packed scalar stmts if SLP is possible. */
|
| 1033 |
|
|
|
| 1034 |
|
|
bool
|
| 1035 |
|
|
vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
| 1036 |
|
|
{
|
| 1037 |
|
|
unsigned int i;
|
| 1038 |
|
|
VEC (gimple, heap) *strided_stores;
|
| 1039 |
|
|
gimple store;
|
| 1040 |
|
|
bool ok = false;
|
| 1041 |
|
|
|
| 1042 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 1043 |
|
|
fprintf (vect_dump, "=== vect_analyze_slp ===");
|
| 1044 |
|
|
|
| 1045 |
|
|
if (loop_vinfo)
|
| 1046 |
|
|
strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
|
| 1047 |
|
|
else
|
| 1048 |
|
|
strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
|
| 1049 |
|
|
|
| 1050 |
|
|
for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
|
| 1051 |
|
|
if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
|
| 1052 |
|
|
ok = true;
|
| 1053 |
|
|
|
| 1054 |
|
|
if (bb_vinfo && !ok)
|
| 1055 |
|
|
{
|
| 1056 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 1057 |
|
|
fprintf (vect_dump, "Failed to SLP the basic block.");
|
| 1058 |
|
|
|
| 1059 |
|
|
return false;
|
| 1060 |
|
|
}
|
| 1061 |
|
|
|
| 1062 |
|
|
return true;
|
| 1063 |
|
|
}
|
| 1064 |
|
|
|
| 1065 |
|
|
|
| 1066 |
|
|
/* For each possible SLP instance decide whether to SLP it and calculate overall
|
| 1067 |
|
|
unrolling factor needed to SLP the loop. */
|
| 1068 |
|
|
|
| 1069 |
|
|
void
|
| 1070 |
|
|
vect_make_slp_decision (loop_vec_info loop_vinfo)
|
| 1071 |
|
|
{
|
| 1072 |
|
|
unsigned int i, unrolling_factor = 1;
|
| 1073 |
|
|
VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
|
| 1074 |
|
|
slp_instance instance;
|
| 1075 |
|
|
int decided_to_slp = 0;
|
| 1076 |
|
|
|
| 1077 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 1078 |
|
|
fprintf (vect_dump, "=== vect_make_slp_decision ===");
|
| 1079 |
|
|
|
| 1080 |
|
|
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
|
| 1081 |
|
|
{
|
| 1082 |
|
|
/* FORNOW: SLP if you can. */
|
| 1083 |
|
|
if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
|
| 1084 |
|
|
unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
|
| 1085 |
|
|
|
| 1086 |
|
|
/* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
|
| 1087 |
|
|
call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
|
| 1088 |
|
|
loop-based vectorization. Such stmts will be marked as HYBRID. */
|
| 1089 |
|
|
vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
|
| 1090 |
|
|
decided_to_slp++;
|
| 1091 |
|
|
}
|
| 1092 |
|
|
|
| 1093 |
|
|
LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
|
| 1094 |
|
|
|
| 1095 |
|
|
if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
|
| 1096 |
|
|
fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
|
| 1097 |
|
|
decided_to_slp, unrolling_factor);
|
| 1098 |
|
|
}
|
| 1099 |
|
|
|
| 1100 |
|
|
|
| 1101 |
|
|
/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
|
| 1102 |
|
|
can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
|
| 1103 |
|
|
|
| 1104 |
|
|
static void
|
| 1105 |
|
|
vect_detect_hybrid_slp_stmts (slp_tree node)
|
| 1106 |
|
|
{
|
| 1107 |
|
|
int i;
|
| 1108 |
|
|
gimple stmt;
|
| 1109 |
|
|
imm_use_iterator imm_iter;
|
| 1110 |
|
|
gimple use_stmt;
|
| 1111 |
|
|
stmt_vec_info stmt_vinfo;
|
| 1112 |
|
|
|
| 1113 |
|
|
if (!node)
|
| 1114 |
|
|
return;
|
| 1115 |
|
|
|
| 1116 |
|
|
for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
|
| 1117 |
|
|
if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
|
| 1118 |
|
|
&& TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
|
| 1119 |
|
|
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
|
| 1120 |
|
|
if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
|
| 1121 |
|
|
&& !STMT_SLP_TYPE (stmt_vinfo)
|
| 1122 |
|
|
&& (STMT_VINFO_RELEVANT (stmt_vinfo)
|
| 1123 |
|
|
|| VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))))
|
| 1124 |
|
|
vect_mark_slp_stmts (node, hybrid, i);
|
| 1125 |
|
|
|
| 1126 |
|
|
vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
|
| 1127 |
|
|
vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
|
| 1128 |
|
|
}
|
| 1129 |
|
|
|
| 1130 |
|
|
|
| 1131 |
|
|
/* Find stmts that must be both vectorized and SLPed. */
|
| 1132 |
|
|
|
| 1133 |
|
|
void
|
| 1134 |
|
|
vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
|
| 1135 |
|
|
{
|
| 1136 |
|
|
unsigned int i;
|
| 1137 |
|
|
VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
|
| 1138 |
|
|
slp_instance instance;
|
| 1139 |
|
|
|
| 1140 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 1141 |
|
|
fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
|
| 1142 |
|
|
|
| 1143 |
|
|
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
|
| 1144 |
|
|
vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
|
| 1145 |
|
|
}
|
| 1146 |
|
|
|
| 1147 |
|
|
|
| 1148 |
|
|
/* Create and initialize a new bb_vec_info struct for BB, as well as
|
| 1149 |
|
|
stmt_vec_info structs for all the stmts in it. */
|
| 1150 |
|
|
|
| 1151 |
|
|
static bb_vec_info
|
| 1152 |
|
|
new_bb_vec_info (basic_block bb)
|
| 1153 |
|
|
{
|
| 1154 |
|
|
bb_vec_info res = NULL;
|
| 1155 |
|
|
gimple_stmt_iterator gsi;
|
| 1156 |
|
|
|
| 1157 |
|
|
res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
|
| 1158 |
|
|
BB_VINFO_BB (res) = bb;
|
| 1159 |
|
|
|
| 1160 |
|
|
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
| 1161 |
|
|
{
|
| 1162 |
|
|
gimple stmt = gsi_stmt (gsi);
|
| 1163 |
|
|
gimple_set_uid (stmt, 0);
|
| 1164 |
|
|
set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
|
| 1165 |
|
|
}
|
| 1166 |
|
|
|
| 1167 |
|
|
BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
|
| 1168 |
|
|
BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
|
| 1169 |
|
|
|
| 1170 |
|
|
bb->aux = res;
|
| 1171 |
|
|
return res;
|
| 1172 |
|
|
}
|
| 1173 |
|
|
|
| 1174 |
|
|
|
| 1175 |
|
|
/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
|
| 1176 |
|
|
stmts in the basic block. */
|
| 1177 |
|
|
|
| 1178 |
|
|
static void
|
| 1179 |
|
|
destroy_bb_vec_info (bb_vec_info bb_vinfo)
|
| 1180 |
|
|
{
|
| 1181 |
|
|
basic_block bb;
|
| 1182 |
|
|
gimple_stmt_iterator si;
|
| 1183 |
|
|
|
| 1184 |
|
|
if (!bb_vinfo)
|
| 1185 |
|
|
return;
|
| 1186 |
|
|
|
| 1187 |
|
|
bb = BB_VINFO_BB (bb_vinfo);
|
| 1188 |
|
|
|
| 1189 |
|
|
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
|
| 1190 |
|
|
{
|
| 1191 |
|
|
gimple stmt = gsi_stmt (si);
|
| 1192 |
|
|
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
| 1193 |
|
|
|
| 1194 |
|
|
if (stmt_info)
|
| 1195 |
|
|
/* Free stmt_vec_info. */
|
| 1196 |
|
|
free_stmt_vec_info (stmt);
|
| 1197 |
|
|
}
|
| 1198 |
|
|
|
| 1199 |
|
|
VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
|
| 1200 |
|
|
VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
|
| 1201 |
|
|
free (bb_vinfo);
|
| 1202 |
|
|
bb->aux = NULL;
|
| 1203 |
|
|
}
|
| 1204 |
|
|
|
| 1205 |
|
|
|
| 1206 |
|
|
/* Analyze statements contained in SLP tree node after recursively analyzing
|
| 1207 |
|
|
the subtree. Return TRUE if the operations are supported. */
|
| 1208 |
|
|
|
| 1209 |
|
|
static bool
|
| 1210 |
|
|
vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
|
| 1211 |
|
|
{
|
| 1212 |
|
|
bool dummy;
|
| 1213 |
|
|
int i;
|
| 1214 |
|
|
gimple stmt;
|
| 1215 |
|
|
|
| 1216 |
|
|
if (!node)
|
| 1217 |
|
|
return true;
|
| 1218 |
|
|
|
| 1219 |
|
|
if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
|
| 1220 |
|
|
|| !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
|
| 1221 |
|
|
return false;
|
| 1222 |
|
|
|
| 1223 |
|
|
for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
|
| 1224 |
|
|
{
|
| 1225 |
|
|
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
| 1226 |
|
|
gcc_assert (stmt_info);
|
| 1227 |
|
|
gcc_assert (PURE_SLP_STMT (stmt_info));
|
| 1228 |
|
|
|
| 1229 |
|
|
if (!vect_analyze_stmt (stmt, &dummy, node))
|
| 1230 |
|
|
return false;
|
| 1231 |
|
|
}
|
| 1232 |
|
|
|
| 1233 |
|
|
return true;
|
| 1234 |
|
|
}
|
| 1235 |
|
|
|
| 1236 |
|
|
|
| 1237 |
|
|
/* Analyze statements in SLP instances of the basic block. Return TRUE if the
|
| 1238 |
|
|
operations are supported. */
|
| 1239 |
|
|
|
| 1240 |
|
|
static bool
|
| 1241 |
|
|
vect_slp_analyze_operations (bb_vec_info bb_vinfo)
|
| 1242 |
|
|
{
|
| 1243 |
|
|
VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
|
| 1244 |
|
|
slp_instance instance;
|
| 1245 |
|
|
int i;
|
| 1246 |
|
|
|
| 1247 |
|
|
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
|
| 1248 |
|
|
{
|
| 1249 |
|
|
if (!vect_slp_analyze_node_operations (bb_vinfo,
|
| 1250 |
|
|
SLP_INSTANCE_TREE (instance)))
|
| 1251 |
|
|
{
|
| 1252 |
|
|
vect_free_slp_instance (instance);
|
| 1253 |
|
|
VEC_ordered_remove (slp_instance, slp_instances, i);
|
| 1254 |
|
|
}
|
| 1255 |
|
|
else
|
| 1256 |
|
|
i++;
|
| 1257 |
|
|
}
|
| 1258 |
|
|
|
| 1259 |
|
|
if (!VEC_length (slp_instance, slp_instances))
|
| 1260 |
|
|
return false;
|
| 1261 |
|
|
|
| 1262 |
|
|
return true;
|
| 1263 |
|
|
}
|
| 1264 |
|
|
|
| 1265 |
|
|
|
| 1266 |
|
|
/* Cheick if the basic block can be vectorized. */
|
| 1267 |
|
|
|
| 1268 |
|
|
bb_vec_info
|
| 1269 |
|
|
vect_slp_analyze_bb (basic_block bb)
|
| 1270 |
|
|
{
|
| 1271 |
|
|
bb_vec_info bb_vinfo;
|
| 1272 |
|
|
VEC (ddr_p, heap) *ddrs;
|
| 1273 |
|
|
VEC (slp_instance, heap) *slp_instances;
|
| 1274 |
|
|
slp_instance instance;
|
| 1275 |
|
|
int i, insns = 0;
|
| 1276 |
|
|
gimple_stmt_iterator gsi;
|
| 1277 |
|
|
|
| 1278 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 1279 |
|
|
fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
|
| 1280 |
|
|
|
| 1281 |
|
|
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
| 1282 |
|
|
{
|
| 1283 |
|
|
gimple stmt = gsi_stmt (gsi);
|
| 1284 |
|
|
if (!is_gimple_debug (stmt)
|
| 1285 |
|
|
&& !gimple_nop_p (stmt)
|
| 1286 |
|
|
&& gimple_code (stmt) != GIMPLE_LABEL)
|
| 1287 |
|
|
insns++;
|
| 1288 |
|
|
}
|
| 1289 |
|
|
|
| 1290 |
|
|
if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
|
| 1291 |
|
|
{
|
| 1292 |
|
|
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
| 1293 |
|
|
fprintf (vect_dump, "not vectorized: too many instructions in basic "
|
| 1294 |
|
|
"block.\n");
|
| 1295 |
|
|
|
| 1296 |
|
|
return NULL;
|
| 1297 |
|
|
}
|
| 1298 |
|
|
|
| 1299 |
|
|
bb_vinfo = new_bb_vec_info (bb);
|
| 1300 |
|
|
if (!bb_vinfo)
|
| 1301 |
|
|
return NULL;
|
| 1302 |
|
|
|
| 1303 |
|
|
if (!vect_analyze_data_refs (NULL, bb_vinfo))
|
| 1304 |
|
|
{
|
| 1305 |
|
|
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
| 1306 |
|
|
fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
|
| 1307 |
|
|
"block.\n");
|
| 1308 |
|
|
|
| 1309 |
|
|
destroy_bb_vec_info (bb_vinfo);
|
| 1310 |
|
|
return NULL;
|
| 1311 |
|
|
}
|
| 1312 |
|
|
|
| 1313 |
|
|
ddrs = BB_VINFO_DDRS (bb_vinfo);
|
| 1314 |
|
|
if (!VEC_length (ddr_p, ddrs))
|
| 1315 |
|
|
{
|
| 1316 |
|
|
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
| 1317 |
|
|
fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
|
| 1318 |
|
|
"block.\n");
|
| 1319 |
|
|
|
| 1320 |
|
|
destroy_bb_vec_info (bb_vinfo);
|
| 1321 |
|
|
return NULL;
|
| 1322 |
|
|
}
|
| 1323 |
|
|
|
| 1324 |
|
|
if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
|
| 1325 |
|
|
{
|
| 1326 |
|
|
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
| 1327 |
|
|
fprintf (vect_dump, "not vectorized: bad data alignment in basic "
|
| 1328 |
|
|
"block.\n");
|
| 1329 |
|
|
|
| 1330 |
|
|
destroy_bb_vec_info (bb_vinfo);
|
| 1331 |
|
|
return NULL;
|
| 1332 |
|
|
}
|
| 1333 |
|
|
|
| 1334 |
|
|
if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo))
|
| 1335 |
|
|
{
|
| 1336 |
|
|
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
| 1337 |
|
|
fprintf (vect_dump, "not vectorized: unhandled data dependence in basic"
|
| 1338 |
|
|
" block.\n");
|
| 1339 |
|
|
|
| 1340 |
|
|
destroy_bb_vec_info (bb_vinfo);
|
| 1341 |
|
|
return NULL;
|
| 1342 |
|
|
}
|
| 1343 |
|
|
|
| 1344 |
|
|
if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
|
| 1345 |
|
|
{
|
| 1346 |
|
|
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
| 1347 |
|
|
fprintf (vect_dump, "not vectorized: unhandled data access in basic "
|
| 1348 |
|
|
"block.\n");
|
| 1349 |
|
|
|
| 1350 |
|
|
destroy_bb_vec_info (bb_vinfo);
|
| 1351 |
|
|
return NULL;
|
| 1352 |
|
|
}
|
| 1353 |
|
|
|
| 1354 |
|
|
if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
|
| 1355 |
|
|
{
|
| 1356 |
|
|
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
| 1357 |
|
|
fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
|
| 1358 |
|
|
"block.\n");
|
| 1359 |
|
|
|
| 1360 |
|
|
destroy_bb_vec_info (bb_vinfo);
|
| 1361 |
|
|
return NULL;
|
| 1362 |
|
|
}
|
| 1363 |
|
|
|
| 1364 |
|
|
/* Check the SLP opportunities in the basic block, analyze and build SLP
|
| 1365 |
|
|
trees. */
|
| 1366 |
|
|
if (!vect_analyze_slp (NULL, bb_vinfo))
|
| 1367 |
|
|
{
|
| 1368 |
|
|
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
| 1369 |
|
|
fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
|
| 1370 |
|
|
"in basic block.\n");
|
| 1371 |
|
|
|
| 1372 |
|
|
destroy_bb_vec_info (bb_vinfo);
|
| 1373 |
|
|
return NULL;
|
| 1374 |
|
|
}
|
| 1375 |
|
|
|
| 1376 |
|
|
slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
|
| 1377 |
|
|
|
| 1378 |
|
|
/* Mark all the statements that we want to vectorize as pure SLP and
|
| 1379 |
|
|
relevant. */
|
| 1380 |
|
|
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
|
| 1381 |
|
|
{
|
| 1382 |
|
|
vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
|
| 1383 |
|
|
vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
|
| 1384 |
|
|
}
|
| 1385 |
|
|
|
| 1386 |
|
|
if (!vect_slp_analyze_operations (bb_vinfo))
|
| 1387 |
|
|
{
|
| 1388 |
|
|
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
| 1389 |
|
|
fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
|
| 1390 |
|
|
|
| 1391 |
|
|
destroy_bb_vec_info (bb_vinfo);
|
| 1392 |
|
|
return NULL;
|
| 1393 |
|
|
}
|
| 1394 |
|
|
|
| 1395 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 1396 |
|
|
fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
|
| 1397 |
|
|
|
| 1398 |
|
|
return bb_vinfo;
|
| 1399 |
|
|
}
|
| 1400 |
|
|
|
| 1401 |
|
|
|
| 1402 |
|
|
/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
|
| 1403 |
|
|
the number of created vector stmts depends on the unrolling factor). However,
|
| 1404 |
|
|
the actual number of vector stmts for every SLP node depends on VF which is
|
| 1405 |
|
|
set later in vect_analyze_operations(). Hence, SLP costs should be updated.
|
| 1406 |
|
|
In this function we assume that the inside costs calculated in
|
| 1407 |
|
|
vect_model_xxx_cost are linear in ncopies. */
|
| 1408 |
|
|
|
| 1409 |
|
|
void
|
| 1410 |
|
|
vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
|
| 1411 |
|
|
{
|
| 1412 |
|
|
unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
| 1413 |
|
|
VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
|
| 1414 |
|
|
slp_instance instance;
|
| 1415 |
|
|
|
| 1416 |
|
|
if (vect_print_dump_info (REPORT_SLP))
|
| 1417 |
|
|
fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
|
| 1418 |
|
|
|
| 1419 |
|
|
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
|
| 1420 |
|
|
/* We assume that costs are linear in ncopies. */
|
| 1421 |
|
|
SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
|
| 1422 |
|
|
/ SLP_INSTANCE_UNROLLING_FACTOR (instance);
|
| 1423 |
|
|
}
|
| 1424 |
|
|
|
| 1425 |
|
|
|
| 1426 |
|
|
/* For constant and loop invariant defs of SLP_NODE this function returns
|
| 1427 |
|
|
(vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
|
| 1428 |
|
|
OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
|
| 1429 |
|
|
stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
|
| 1430 |
|
|
|
| 1431 |
|
|
static void
|
| 1432 |
|
|
vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
|
| 1433 |
|
|
unsigned int op_num, unsigned int number_of_vectors)
|
| 1434 |
|
|
{
|
| 1435 |
|
|
VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
|
| 1436 |
|
|
gimple stmt = VEC_index (gimple, stmts, 0);
|
| 1437 |
|
|
stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
|
| 1438 |
|
|
int nunits;
|
| 1439 |
|
|
tree vec_cst;
|
| 1440 |
|
|
tree t = NULL_TREE;
|
| 1441 |
|
|
int j, number_of_places_left_in_vector;
|
| 1442 |
|
|
tree vector_type;
|
| 1443 |
|
|
tree op, vop;
|
| 1444 |
|
|
int group_size = VEC_length (gimple, stmts);
|
| 1445 |
|
|
unsigned int vec_num, i;
|
| 1446 |
|
|
int number_of_copies = 1;
|
| 1447 |
|
|
VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
|
| 1448 |
|
|
bool constant_p, is_store;
|
| 1449 |
|
|
|
| 1450 |
|
|
if (STMT_VINFO_DATA_REF (stmt_vinfo))
|
| 1451 |
|
|
{
|
| 1452 |
|
|
is_store = true;
|
| 1453 |
|
|
op = gimple_assign_rhs1 (stmt);
|
| 1454 |
|
|
}
|
| 1455 |
|
|
else
|
| 1456 |
|
|
{
|
| 1457 |
|
|
is_store = false;
|
| 1458 |
|
|
op = gimple_op (stmt, op_num + 1);
|
| 1459 |
|
|
}
|
| 1460 |
|
|
|
| 1461 |
|
|
if (CONSTANT_CLASS_P (op))
|
| 1462 |
|
|
constant_p = true;
|
| 1463 |
|
|
else
|
| 1464 |
|
|
constant_p = false;
|
| 1465 |
|
|
|
| 1466 |
|
|
vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
|
| 1467 |
|
|
gcc_assert (vector_type);
|
| 1468 |
|
|
|
| 1469 |
|
|
nunits = TYPE_VECTOR_SUBPARTS (vector_type);
|
| 1470 |
|
|
|
| 1471 |
|
|
/* NUMBER_OF_COPIES is the number of times we need to use the same values in
|
| 1472 |
|
|
created vectors. It is greater than 1 if unrolling is performed.
|
| 1473 |
|
|
|
| 1474 |
|
|
For example, we have two scalar operands, s1 and s2 (e.g., group of
|
| 1475 |
|
|
strided accesses of size two), while NUNITS is four (i.e., four scalars
|
| 1476 |
|
|
of this type can be packed in a vector). The output vector will contain
|
| 1477 |
|
|
two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
|
| 1478 |
|
|
will be 2).
|
| 1479 |
|
|
|
| 1480 |
|
|
If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
|
| 1481 |
|
|
containing the operands.
|
| 1482 |
|
|
|
| 1483 |
|
|
For example, NUNITS is four as before, and the group size is 8
|
| 1484 |
|
|
(s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
|
| 1485 |
|
|
{s5, s6, s7, s8}. */
|
| 1486 |
|
|
|
| 1487 |
|
|
number_of_copies = least_common_multiple (nunits, group_size) / group_size;
|
| 1488 |
|
|
|
| 1489 |
|
|
number_of_places_left_in_vector = nunits;
|
| 1490 |
|
|
for (j = 0; j < number_of_copies; j++)
|
| 1491 |
|
|
{
|
| 1492 |
|
|
for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
|
| 1493 |
|
|
{
|
| 1494 |
|
|
if (is_store)
|
| 1495 |
|
|
op = gimple_assign_rhs1 (stmt);
|
| 1496 |
|
|
else
|
| 1497 |
|
|
op = gimple_op (stmt, op_num + 1);
|
| 1498 |
|
|
|
| 1499 |
|
|
/* Create 'vect_ = {op0,op1,...,opn}'. */
|
| 1500 |
|
|
t = tree_cons (NULL_TREE, op, t);
|
| 1501 |
|
|
|
| 1502 |
|
|
number_of_places_left_in_vector--;
|
| 1503 |
|
|
|
| 1504 |
|
|
if (number_of_places_left_in_vector == 0)
|
| 1505 |
|
|
{
|
| 1506 |
|
|
number_of_places_left_in_vector = nunits;
|
| 1507 |
|
|
|
| 1508 |
|
|
if (constant_p)
|
| 1509 |
|
|
vec_cst = build_vector (vector_type, t);
|
| 1510 |
|
|
else
|
| 1511 |
|
|
vec_cst = build_constructor_from_list (vector_type, t);
|
| 1512 |
|
|
VEC_quick_push (tree, voprnds,
|
| 1513 |
|
|
vect_init_vector (stmt, vec_cst, vector_type, NULL));
|
| 1514 |
|
|
t = NULL_TREE;
|
| 1515 |
|
|
}
|
| 1516 |
|
|
}
|
| 1517 |
|
|
}
|
| 1518 |
|
|
|
| 1519 |
|
|
/* Since the vectors are created in the reverse order, we should invert
|
| 1520 |
|
|
them. */
|
| 1521 |
|
|
vec_num = VEC_length (tree, voprnds);
|
| 1522 |
|
|
for (j = vec_num - 1; j >= 0; j--)
|
| 1523 |
|
|
{
|
| 1524 |
|
|
vop = VEC_index (tree, voprnds, j);
|
| 1525 |
|
|
VEC_quick_push (tree, *vec_oprnds, vop);
|
| 1526 |
|
|
}
|
| 1527 |
|
|
|
| 1528 |
|
|
VEC_free (tree, heap, voprnds);
|
| 1529 |
|
|
|
| 1530 |
|
|
/* In case that VF is greater than the unrolling factor needed for the SLP
|
| 1531 |
|
|
group of stmts, NUMBER_OF_VECTORS to be created is greater than
|
| 1532 |
|
|
NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
|
| 1533 |
|
|
to replicate the vectors. */
|
| 1534 |
|
|
while (number_of_vectors > VEC_length (tree, *vec_oprnds))
|
| 1535 |
|
|
{
|
| 1536 |
|
|
for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
|
| 1537 |
|
|
VEC_quick_push (tree, *vec_oprnds, vop);
|
| 1538 |
|
|
}
|
| 1539 |
|
|
}
|
| 1540 |
|
|
|
| 1541 |
|
|
|
| 1542 |
|
|
/* Get vectorized definitions from SLP_NODE that contains corresponding
|
| 1543 |
|
|
vectorized def-stmts. */
|
| 1544 |
|
|
|
| 1545 |
|
|
static void
|
| 1546 |
|
|
vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
|
| 1547 |
|
|
{
|
| 1548 |
|
|
tree vec_oprnd;
|
| 1549 |
|
|
gimple vec_def_stmt;
|
| 1550 |
|
|
unsigned int i;
|
| 1551 |
|
|
|
| 1552 |
|
|
gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
|
| 1553 |
|
|
|
| 1554 |
|
|
for (i = 0;
|
| 1555 |
|
|
VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
|
| 1556 |
|
|
i++)
|
| 1557 |
|
|
{
|
| 1558 |
|
|
gcc_assert (vec_def_stmt);
|
| 1559 |
|
|
vec_oprnd = gimple_get_lhs (vec_def_stmt);
|
| 1560 |
|
|
VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
|
| 1561 |
|
|
}
|
| 1562 |
|
|
}
|
| 1563 |
|
|
|
| 1564 |
|
|
|
| 1565 |
|
|
/* Get vectorized definitions for SLP_NODE.
|
| 1566 |
|
|
If the scalar definitions are loop invariants or constants, collect them and
|
| 1567 |
|
|
call vect_get_constant_vectors() to create vector stmts.
|
| 1568 |
|
|
Otherwise, the def-stmts must be already vectorized and the vectorized stmts
|
| 1569 |
|
|
must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
|
| 1570 |
|
|
vect_get_slp_vect_defs() to retrieve them.
|
| 1571 |
|
|
If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
|
| 1572 |
|
|
the right node. This is used when the second operand must remain scalar. */
|
| 1573 |
|
|
|
| 1574 |
|
|
void
|
| 1575 |
|
|
vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
|
| 1576 |
|
|
VEC (tree,heap) **vec_oprnds1)
|
| 1577 |
|
|
{
|
| 1578 |
|
|
gimple first_stmt;
|
| 1579 |
|
|
enum tree_code code;
|
| 1580 |
|
|
int number_of_vects;
|
| 1581 |
|
|
HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
|
| 1582 |
|
|
|
| 1583 |
|
|
first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
|
| 1584 |
|
|
/* The number of vector defs is determined by the number of vector statements
|
| 1585 |
|
|
in the node from which we get those statements. */
|
| 1586 |
|
|
if (SLP_TREE_LEFT (slp_node))
|
| 1587 |
|
|
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
|
| 1588 |
|
|
else
|
| 1589 |
|
|
{
|
| 1590 |
|
|
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
| 1591 |
|
|
/* Number of vector stmts was calculated according to LHS in
|
| 1592 |
|
|
vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
|
| 1593 |
|
|
necessary. See vect_get_smallest_scalar_type() for details. */
|
| 1594 |
|
|
vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
|
| 1595 |
|
|
&rhs_size_unit);
|
| 1596 |
|
|
if (rhs_size_unit != lhs_size_unit)
|
| 1597 |
|
|
{
|
| 1598 |
|
|
number_of_vects *= rhs_size_unit;
|
| 1599 |
|
|
number_of_vects /= lhs_size_unit;
|
| 1600 |
|
|
}
|
| 1601 |
|
|
}
|
| 1602 |
|
|
|
| 1603 |
|
|
/* Allocate memory for vectorized defs. */
|
| 1604 |
|
|
*vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
|
| 1605 |
|
|
|
| 1606 |
|
|
/* SLP_NODE corresponds either to a group of stores or to a group of
|
| 1607 |
|
|
unary/binary operations. We don't call this function for loads. */
|
| 1608 |
|
|
if (SLP_TREE_LEFT (slp_node))
|
| 1609 |
|
|
/* The defs are already vectorized. */
|
| 1610 |
|
|
vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
|
| 1611 |
|
|
else
|
| 1612 |
|
|
/* Build vectors from scalar defs. */
|
| 1613 |
|
|
vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
|
| 1614 |
|
|
|
| 1615 |
|
|
if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
|
| 1616 |
|
|
/* Since we don't call this function with loads, this is a group of
|
| 1617 |
|
|
stores. */
|
| 1618 |
|
|
return;
|
| 1619 |
|
|
|
| 1620 |
|
|
code = gimple_assign_rhs_code (first_stmt);
|
| 1621 |
|
|
if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
|
| 1622 |
|
|
return;
|
| 1623 |
|
|
|
| 1624 |
|
|
/* The number of vector defs is determined by the number of vector statements
|
| 1625 |
|
|
in the node from which we get those statements. */
|
| 1626 |
|
|
if (SLP_TREE_RIGHT (slp_node))
|
| 1627 |
|
|
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
|
| 1628 |
|
|
else
|
| 1629 |
|
|
number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
|
| 1630 |
|
|
|
| 1631 |
|
|
*vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
|
| 1632 |
|
|
|
| 1633 |
|
|
if (SLP_TREE_RIGHT (slp_node))
|
| 1634 |
|
|
/* The defs are already vectorized. */
|
| 1635 |
|
|
vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
|
| 1636 |
|
|
else
|
| 1637 |
|
|
/* Build vectors from scalar defs. */
|
| 1638 |
|
|
vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
|
| 1639 |
|
|
}
|
| 1640 |
|
|
|
| 1641 |
|
|
|
| 1642 |
|
|
/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
|
| 1643 |
|
|
building a vector of type MASK_TYPE from it) and two input vectors placed in
|
| 1644 |
|
|
DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
|
| 1645 |
|
|
shifting by STRIDE elements of DR_CHAIN for every copy.
|
| 1646 |
|
|
(STRIDE is the number of vectorized stmts for NODE divided by the number of
|
| 1647 |
|
|
copies).
|
| 1648 |
|
|
VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
|
| 1649 |
|
|
the created stmts must be inserted. */
|
| 1650 |
|
|
|
| 1651 |
|
|
static inline void
|
| 1652 |
|
|
vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
|
| 1653 |
|
|
tree mask, int first_vec_indx, int second_vec_indx,
|
| 1654 |
|
|
gimple_stmt_iterator *gsi, slp_tree node,
|
| 1655 |
|
|
tree builtin_decl, tree vectype,
|
| 1656 |
|
|
VEC(tree,heap) *dr_chain,
|
| 1657 |
|
|
int ncopies, int vect_stmts_counter)
|
| 1658 |
|
|
{
|
| 1659 |
|
|
tree perm_dest;
|
| 1660 |
|
|
gimple perm_stmt = NULL;
|
| 1661 |
|
|
stmt_vec_info next_stmt_info;
|
| 1662 |
|
|
int i, stride;
|
| 1663 |
|
|
tree first_vec, second_vec, data_ref;
|
| 1664 |
|
|
VEC (tree, heap) *params = NULL;
|
| 1665 |
|
|
|
| 1666 |
|
|
stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
|
| 1667 |
|
|
|
| 1668 |
|
|
/* Initialize the vect stmts of NODE to properly insert the generated
|
| 1669 |
|
|
stmts later. */
|
| 1670 |
|
|
for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
|
| 1671 |
|
|
i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
|
| 1672 |
|
|
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
|
| 1673 |
|
|
|
| 1674 |
|
|
perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
|
| 1675 |
|
|
for (i = 0; i < ncopies; i++)
|
| 1676 |
|
|
{
|
| 1677 |
|
|
first_vec = VEC_index (tree, dr_chain, first_vec_indx);
|
| 1678 |
|
|
second_vec = VEC_index (tree, dr_chain, second_vec_indx);
|
| 1679 |
|
|
|
| 1680 |
|
|
/* Build argument list for the vectorized call. */
|
| 1681 |
|
|
VEC_free (tree, heap, params);
|
| 1682 |
|
|
params = VEC_alloc (tree, heap, 3);
|
| 1683 |
|
|
VEC_quick_push (tree, params, first_vec);
|
| 1684 |
|
|
VEC_quick_push (tree, params, second_vec);
|
| 1685 |
|
|
VEC_quick_push (tree, params, mask);
|
| 1686 |
|
|
|
| 1687 |
|
|
/* Generate the permute statement. */
|
| 1688 |
|
|
perm_stmt = gimple_build_call_vec (builtin_decl, params);
|
| 1689 |
|
|
data_ref = make_ssa_name (perm_dest, perm_stmt);
|
| 1690 |
|
|
gimple_call_set_lhs (perm_stmt, data_ref);
|
| 1691 |
|
|
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
|
| 1692 |
|
|
|
| 1693 |
|
|
/* Store the vector statement in NODE. */
|
| 1694 |
|
|
VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
|
| 1695 |
|
|
stride * i + vect_stmts_counter, perm_stmt);
|
| 1696 |
|
|
|
| 1697 |
|
|
first_vec_indx += stride;
|
| 1698 |
|
|
second_vec_indx += stride;
|
| 1699 |
|
|
}
|
| 1700 |
|
|
|
| 1701 |
|
|
/* Mark the scalar stmt as vectorized. */
|
| 1702 |
|
|
next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
|
| 1703 |
|
|
STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
|
| 1704 |
|
|
}
|
| 1705 |
|
|
|
| 1706 |
|
|
|
| 1707 |
|
|
/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
|
| 1708 |
|
|
return in CURRENT_MASK_ELEMENT its equivalent in target specific
|
| 1709 |
|
|
representation. Check that the mask is valid and return FALSE if not.
|
| 1710 |
|
|
Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
|
| 1711 |
|
|
the next vector, i.e., the current first vector is not needed. */
|
| 1712 |
|
|
|
| 1713 |
|
|
static bool
|
| 1714 |
|
|
vect_get_mask_element (gimple stmt, int first_mask_element, int m,
|
| 1715 |
|
|
int mask_nunits, bool only_one_vec, int index,
|
| 1716 |
|
|
int *mask, int *current_mask_element,
|
| 1717 |
|
|
bool *need_next_vector)
|
| 1718 |
|
|
{
|
| 1719 |
|
|
int i;
|
| 1720 |
|
|
static int number_of_mask_fixes = 1;
|
| 1721 |
|
|
static bool mask_fixed = false;
|
| 1722 |
|
|
static bool needs_first_vector = false;
|
| 1723 |
|
|
|
| 1724 |
|
|
/* Convert to target specific representation. */
|
| 1725 |
|
|
*current_mask_element = first_mask_element + m;
|
| 1726 |
|
|
/* Adjust the value in case it's a mask for second and third vectors. */
|
| 1727 |
|
|
*current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
|
| 1728 |
|
|
|
| 1729 |
|
|
if (*current_mask_element < mask_nunits)
|
| 1730 |
|
|
needs_first_vector = true;
|
| 1731 |
|
|
|
| 1732 |
|
|
/* We have only one input vector to permute but the mask accesses values in
|
| 1733 |
|
|
the next vector as well. */
|
| 1734 |
|
|
if (only_one_vec && *current_mask_element >= mask_nunits)
|
| 1735 |
|
|
{
|
| 1736 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 1737 |
|
|
{
|
| 1738 |
|
|
fprintf (vect_dump, "permutation requires at least two vectors ");
|
| 1739 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 1740 |
|
|
}
|
| 1741 |
|
|
|
| 1742 |
|
|
return false;
|
| 1743 |
|
|
}
|
| 1744 |
|
|
|
| 1745 |
|
|
/* The mask requires the next vector. */
|
| 1746 |
|
|
if (*current_mask_element >= mask_nunits * 2)
|
| 1747 |
|
|
{
|
| 1748 |
|
|
if (needs_first_vector || mask_fixed)
|
| 1749 |
|
|
{
|
| 1750 |
|
|
/* We either need the first vector too or have already moved to the
|
| 1751 |
|
|
next vector. In both cases, this permutation needs three
|
| 1752 |
|
|
vectors. */
|
| 1753 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 1754 |
|
|
{
|
| 1755 |
|
|
fprintf (vect_dump, "permutation requires at "
|
| 1756 |
|
|
"least three vectors ");
|
| 1757 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 1758 |
|
|
}
|
| 1759 |
|
|
|
| 1760 |
|
|
return false;
|
| 1761 |
|
|
}
|
| 1762 |
|
|
|
| 1763 |
|
|
/* We move to the next vector, dropping the first one and working with
|
| 1764 |
|
|
the second and the third - we need to adjust the values of the mask
|
| 1765 |
|
|
accordingly. */
|
| 1766 |
|
|
*current_mask_element -= mask_nunits * number_of_mask_fixes;
|
| 1767 |
|
|
|
| 1768 |
|
|
for (i = 0; i < index; i++)
|
| 1769 |
|
|
mask[i] -= mask_nunits * number_of_mask_fixes;
|
| 1770 |
|
|
|
| 1771 |
|
|
(number_of_mask_fixes)++;
|
| 1772 |
|
|
mask_fixed = true;
|
| 1773 |
|
|
}
|
| 1774 |
|
|
|
| 1775 |
|
|
*need_next_vector = mask_fixed;
|
| 1776 |
|
|
|
| 1777 |
|
|
/* This was the last element of this mask. Start a new one. */
|
| 1778 |
|
|
if (index == mask_nunits - 1)
|
| 1779 |
|
|
{
|
| 1780 |
|
|
number_of_mask_fixes = 1;
|
| 1781 |
|
|
mask_fixed = false;
|
| 1782 |
|
|
needs_first_vector = false;
|
| 1783 |
|
|
}
|
| 1784 |
|
|
|
| 1785 |
|
|
return true;
|
| 1786 |
|
|
}
|
| 1787 |
|
|
|
| 1788 |
|
|
|
| 1789 |
|
|
/* Generate vector permute statements from a list of loads in DR_CHAIN.
|
| 1790 |
|
|
If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
|
| 1791 |
|
|
permute statements for SLP_NODE_INSTANCE. */
|
| 1792 |
|
|
bool
|
| 1793 |
|
|
vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
|
| 1794 |
|
|
gimple_stmt_iterator *gsi, int vf,
|
| 1795 |
|
|
slp_instance slp_node_instance, bool analyze_only)
|
| 1796 |
|
|
{
|
| 1797 |
|
|
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
| 1798 |
|
|
tree mask_element_type = NULL_TREE, mask_type;
|
| 1799 |
|
|
int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
|
| 1800 |
|
|
slp_tree node;
|
| 1801 |
|
|
tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
|
| 1802 |
|
|
gimple next_scalar_stmt;
|
| 1803 |
|
|
int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
|
| 1804 |
|
|
int first_mask_element;
|
| 1805 |
|
|
int index, unroll_factor, *mask, current_mask_element, ncopies;
|
| 1806 |
|
|
bool only_one_vec = false, need_next_vector = false;
|
| 1807 |
|
|
int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
|
| 1808 |
|
|
|
| 1809 |
|
|
if (!targetm.vectorize.builtin_vec_perm)
|
| 1810 |
|
|
{
|
| 1811 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 1812 |
|
|
{
|
| 1813 |
|
|
fprintf (vect_dump, "no builtin for vect permute for ");
|
| 1814 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 1815 |
|
|
}
|
| 1816 |
|
|
|
| 1817 |
|
|
return false;
|
| 1818 |
|
|
}
|
| 1819 |
|
|
|
| 1820 |
|
|
builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
|
| 1821 |
|
|
&mask_element_type);
|
| 1822 |
|
|
if (!builtin_decl || !mask_element_type)
|
| 1823 |
|
|
{
|
| 1824 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 1825 |
|
|
{
|
| 1826 |
|
|
fprintf (vect_dump, "no builtin for vect permute for ");
|
| 1827 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 1828 |
|
|
}
|
| 1829 |
|
|
|
| 1830 |
|
|
return false;
|
| 1831 |
|
|
}
|
| 1832 |
|
|
|
| 1833 |
|
|
mask_type = get_vectype_for_scalar_type (mask_element_type);
|
| 1834 |
|
|
mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
|
| 1835 |
|
|
mask = (int *) xmalloc (sizeof (int) * mask_nunits);
|
| 1836 |
|
|
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
| 1837 |
|
|
scale = mask_nunits / nunits;
|
| 1838 |
|
|
unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
|
| 1839 |
|
|
|
| 1840 |
|
|
/* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
|
| 1841 |
|
|
unrolling factor. */
|
| 1842 |
|
|
orig_vec_stmts_num = group_size *
|
| 1843 |
|
|
SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
|
| 1844 |
|
|
if (orig_vec_stmts_num == 1)
|
| 1845 |
|
|
only_one_vec = true;
|
| 1846 |
|
|
|
| 1847 |
|
|
/* Number of copies is determined by the final vectorization factor
|
| 1848 |
|
|
relatively to SLP_NODE_INSTANCE unrolling factor. */
|
| 1849 |
|
|
ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
|
| 1850 |
|
|
|
| 1851 |
|
|
/* Generate permutation masks for every NODE. Number of masks for each NODE
|
| 1852 |
|
|
is equal to GROUP_SIZE.
|
| 1853 |
|
|
E.g., we have a group of three nodes with three loads from the same
|
| 1854 |
|
|
location in each node, and the vector size is 4. I.e., we have a
|
| 1855 |
|
|
a0b0c0a1b1c1... sequence and we need to create the following vectors:
|
| 1856 |
|
|
for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
|
| 1857 |
|
|
for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
|
| 1858 |
|
|
...
|
| 1859 |
|
|
|
| 1860 |
|
|
The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
|
| 1861 |
|
|
scpecific type, e.g., in bytes for Altivec.
|
| 1862 |
|
|
The last mask is illegal since we assume two operands for permute
|
| 1863 |
|
|
operation, and the mask element values can't be outside that range. Hence,
|
| 1864 |
|
|
the last mask must be converted into {2,5,5,5}.
|
| 1865 |
|
|
For the first two permutations we need the first and the second input
|
| 1866 |
|
|
vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
|
| 1867 |
|
|
we need the second and the third vectors: {b1,c1,a2,b2} and
|
| 1868 |
|
|
{c2,a3,b3,c3}. */
|
| 1869 |
|
|
|
| 1870 |
|
|
for (i = 0;
|
| 1871 |
|
|
VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
|
| 1872 |
|
|
i, node);
|
| 1873 |
|
|
i++)
|
| 1874 |
|
|
{
|
| 1875 |
|
|
scalar_index = 0;
|
| 1876 |
|
|
index = 0;
|
| 1877 |
|
|
vect_stmts_counter = 0;
|
| 1878 |
|
|
vec_index = 0;
|
| 1879 |
|
|
first_vec_index = vec_index++;
|
| 1880 |
|
|
if (only_one_vec)
|
| 1881 |
|
|
second_vec_index = first_vec_index;
|
| 1882 |
|
|
else
|
| 1883 |
|
|
second_vec_index = vec_index++;
|
| 1884 |
|
|
|
| 1885 |
|
|
for (j = 0; j < unroll_factor; j++)
|
| 1886 |
|
|
{
|
| 1887 |
|
|
for (k = 0; k < group_size; k++)
|
| 1888 |
|
|
{
|
| 1889 |
|
|
first_mask_element = (i + j * group_size) * scale;
|
| 1890 |
|
|
for (m = 0; m < scale; m++)
|
| 1891 |
|
|
{
|
| 1892 |
|
|
if (!vect_get_mask_element (stmt, first_mask_element, m,
|
| 1893 |
|
|
mask_nunits, only_one_vec, index, mask,
|
| 1894 |
|
|
¤t_mask_element, &need_next_vector))
|
| 1895 |
|
|
return false;
|
| 1896 |
|
|
|
| 1897 |
|
|
mask[index++] = current_mask_element;
|
| 1898 |
|
|
}
|
| 1899 |
|
|
|
| 1900 |
|
|
if (index == mask_nunits)
|
| 1901 |
|
|
{
|
| 1902 |
|
|
tree mask_vec = NULL;
|
| 1903 |
|
|
|
| 1904 |
|
|
while (--index >= 0)
|
| 1905 |
|
|
{
|
| 1906 |
|
|
tree t = build_int_cst (mask_element_type, mask[index]);
|
| 1907 |
|
|
mask_vec = tree_cons (NULL, t, mask_vec);
|
| 1908 |
|
|
}
|
| 1909 |
|
|
mask_vec = build_vector (mask_type, mask_vec);
|
| 1910 |
|
|
index = 0;
|
| 1911 |
|
|
|
| 1912 |
|
|
if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
|
| 1913 |
|
|
mask_vec))
|
| 1914 |
|
|
{
|
| 1915 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 1916 |
|
|
{
|
| 1917 |
|
|
fprintf (vect_dump, "unsupported vect permute ");
|
| 1918 |
|
|
print_generic_expr (vect_dump, mask_vec, 0);
|
| 1919 |
|
|
}
|
| 1920 |
|
|
free (mask);
|
| 1921 |
|
|
return false;
|
| 1922 |
|
|
}
|
| 1923 |
|
|
|
| 1924 |
|
|
if (!analyze_only)
|
| 1925 |
|
|
{
|
| 1926 |
|
|
if (need_next_vector)
|
| 1927 |
|
|
{
|
| 1928 |
|
|
first_vec_index = second_vec_index;
|
| 1929 |
|
|
second_vec_index = vec_index;
|
| 1930 |
|
|
}
|
| 1931 |
|
|
|
| 1932 |
|
|
next_scalar_stmt = VEC_index (gimple,
|
| 1933 |
|
|
SLP_TREE_SCALAR_STMTS (node), scalar_index++);
|
| 1934 |
|
|
|
| 1935 |
|
|
vect_create_mask_and_perm (stmt, next_scalar_stmt,
|
| 1936 |
|
|
mask_vec, first_vec_index, second_vec_index,
|
| 1937 |
|
|
gsi, node, builtin_decl, vectype, dr_chain,
|
| 1938 |
|
|
ncopies, vect_stmts_counter++);
|
| 1939 |
|
|
}
|
| 1940 |
|
|
}
|
| 1941 |
|
|
}
|
| 1942 |
|
|
}
|
| 1943 |
|
|
}
|
| 1944 |
|
|
|
| 1945 |
|
|
free (mask);
|
| 1946 |
|
|
return true;
|
| 1947 |
|
|
}
|
| 1948 |
|
|
|
| 1949 |
|
|
|
| 1950 |
|
|
|
| 1951 |
|
|
/* Vectorize SLP instance tree in postorder. */
|
| 1952 |
|
|
|
| 1953 |
|
|
static bool
|
| 1954 |
|
|
vect_schedule_slp_instance (slp_tree node, slp_instance instance,
|
| 1955 |
|
|
unsigned int vectorization_factor)
|
| 1956 |
|
|
{
|
| 1957 |
|
|
gimple stmt;
|
| 1958 |
|
|
bool strided_store, is_store;
|
| 1959 |
|
|
gimple_stmt_iterator si;
|
| 1960 |
|
|
stmt_vec_info stmt_info;
|
| 1961 |
|
|
unsigned int vec_stmts_size, nunits, group_size;
|
| 1962 |
|
|
tree vectype;
|
| 1963 |
|
|
int i;
|
| 1964 |
|
|
slp_tree loads_node;
|
| 1965 |
|
|
|
| 1966 |
|
|
if (!node)
|
| 1967 |
|
|
return false;
|
| 1968 |
|
|
|
| 1969 |
|
|
vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
|
| 1970 |
|
|
vectorization_factor);
|
| 1971 |
|
|
vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
|
| 1972 |
|
|
vectorization_factor);
|
| 1973 |
|
|
|
| 1974 |
|
|
stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
|
| 1975 |
|
|
stmt_info = vinfo_for_stmt (stmt);
|
| 1976 |
|
|
|
| 1977 |
|
|
/* VECTYPE is the type of the destination. */
|
| 1978 |
|
|
vectype = get_vectype_for_scalar_type (TREE_TYPE (gimple_assign_lhs (stmt)));
|
| 1979 |
|
|
nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
|
| 1980 |
|
|
group_size = SLP_INSTANCE_GROUP_SIZE (instance);
|
| 1981 |
|
|
|
| 1982 |
|
|
/* For each SLP instance calculate number of vector stmts to be created
|
| 1983 |
|
|
for the scalar stmts in each node of the SLP tree. Number of vector
|
| 1984 |
|
|
elements in one vector iteration is the number of scalar elements in
|
| 1985 |
|
|
one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
|
| 1986 |
|
|
size. */
|
| 1987 |
|
|
vec_stmts_size = (vectorization_factor * group_size) / nunits;
|
| 1988 |
|
|
|
| 1989 |
|
|
/* In case of load permutation we have to allocate vectorized statements for
|
| 1990 |
|
|
all the nodes that participate in that permutation. */
|
| 1991 |
|
|
if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
|
| 1992 |
|
|
{
|
| 1993 |
|
|
for (i = 0;
|
| 1994 |
|
|
VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
|
| 1995 |
|
|
i++)
|
| 1996 |
|
|
{
|
| 1997 |
|
|
if (!SLP_TREE_VEC_STMTS (loads_node))
|
| 1998 |
|
|
{
|
| 1999 |
|
|
SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
|
| 2000 |
|
|
vec_stmts_size);
|
| 2001 |
|
|
SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
|
| 2002 |
|
|
}
|
| 2003 |
|
|
}
|
| 2004 |
|
|
}
|
| 2005 |
|
|
|
| 2006 |
|
|
if (!SLP_TREE_VEC_STMTS (node))
|
| 2007 |
|
|
{
|
| 2008 |
|
|
SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
|
| 2009 |
|
|
SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
|
| 2010 |
|
|
}
|
| 2011 |
|
|
|
| 2012 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 2013 |
|
|
{
|
| 2014 |
|
|
fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
|
| 2015 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 2016 |
|
|
}
|
| 2017 |
|
|
|
| 2018 |
|
|
/* Loads should be inserted before the first load. */
|
| 2019 |
|
|
if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
|
| 2020 |
|
|
&& STMT_VINFO_STRIDED_ACCESS (stmt_info)
|
| 2021 |
|
|
&& !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
|
| 2022 |
|
|
si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
|
| 2023 |
|
|
else
|
| 2024 |
|
|
si = gsi_for_stmt (stmt);
|
| 2025 |
|
|
|
| 2026 |
|
|
is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
|
| 2027 |
|
|
if (is_store)
|
| 2028 |
|
|
{
|
| 2029 |
|
|
if (DR_GROUP_FIRST_DR (stmt_info))
|
| 2030 |
|
|
/* If IS_STORE is TRUE, the vectorization of the
|
| 2031 |
|
|
interleaving chain was completed - free all the stores in
|
| 2032 |
|
|
the chain. */
|
| 2033 |
|
|
vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
|
| 2034 |
|
|
else
|
| 2035 |
|
|
/* FORNOW: SLP originates only from strided stores. */
|
| 2036 |
|
|
gcc_unreachable ();
|
| 2037 |
|
|
|
| 2038 |
|
|
return true;
|
| 2039 |
|
|
}
|
| 2040 |
|
|
|
| 2041 |
|
|
/* FORNOW: SLP originates only from strided stores. */
|
| 2042 |
|
|
return false;
|
| 2043 |
|
|
}
|
| 2044 |
|
|
|
| 2045 |
|
|
|
| 2046 |
|
|
bool
|
| 2047 |
|
|
vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
|
| 2048 |
|
|
{
|
| 2049 |
|
|
VEC (slp_instance, heap) *slp_instances;
|
| 2050 |
|
|
slp_instance instance;
|
| 2051 |
|
|
unsigned int i, vf;
|
| 2052 |
|
|
bool is_store = false;
|
| 2053 |
|
|
|
| 2054 |
|
|
if (loop_vinfo)
|
| 2055 |
|
|
{
|
| 2056 |
|
|
slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
|
| 2057 |
|
|
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
| 2058 |
|
|
}
|
| 2059 |
|
|
else
|
| 2060 |
|
|
{
|
| 2061 |
|
|
slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
|
| 2062 |
|
|
vf = 1;
|
| 2063 |
|
|
}
|
| 2064 |
|
|
|
| 2065 |
|
|
for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
|
| 2066 |
|
|
{
|
| 2067 |
|
|
/* Schedule the tree of INSTANCE. */
|
| 2068 |
|
|
is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
|
| 2069 |
|
|
instance, vf);
|
| 2070 |
|
|
if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
|
| 2071 |
|
|
|| vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
|
| 2072 |
|
|
fprintf (vect_dump, "vectorizing stmts using SLP.");
|
| 2073 |
|
|
}
|
| 2074 |
|
|
|
| 2075 |
|
|
return is_store;
|
| 2076 |
|
|
}
|
| 2077 |
|
|
|
| 2078 |
|
|
|
| 2079 |
|
|
/* Vectorize the basic block. */
|
| 2080 |
|
|
|
| 2081 |
|
|
void
|
| 2082 |
|
|
vect_slp_transform_bb (basic_block bb)
|
| 2083 |
|
|
{
|
| 2084 |
|
|
bb_vec_info bb_vinfo = vec_info_for_bb (bb);
|
| 2085 |
|
|
gimple_stmt_iterator si;
|
| 2086 |
|
|
|
| 2087 |
|
|
gcc_assert (bb_vinfo);
|
| 2088 |
|
|
|
| 2089 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 2090 |
|
|
fprintf (vect_dump, "SLPing BB\n");
|
| 2091 |
|
|
|
| 2092 |
|
|
for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
|
| 2093 |
|
|
{
|
| 2094 |
|
|
gimple stmt = gsi_stmt (si);
|
| 2095 |
|
|
stmt_vec_info stmt_info;
|
| 2096 |
|
|
|
| 2097 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 2098 |
|
|
{
|
| 2099 |
|
|
fprintf (vect_dump, "------>SLPing statement: ");
|
| 2100 |
|
|
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
|
| 2101 |
|
|
}
|
| 2102 |
|
|
|
| 2103 |
|
|
stmt_info = vinfo_for_stmt (stmt);
|
| 2104 |
|
|
gcc_assert (stmt_info);
|
| 2105 |
|
|
|
| 2106 |
|
|
/* Schedule all the SLP instances when the first SLP stmt is reached. */
|
| 2107 |
|
|
if (STMT_SLP_TYPE (stmt_info))
|
| 2108 |
|
|
{
|
| 2109 |
|
|
vect_schedule_slp (NULL, bb_vinfo);
|
| 2110 |
|
|
break;
|
| 2111 |
|
|
}
|
| 2112 |
|
|
}
|
| 2113 |
|
|
|
| 2114 |
|
|
mark_sym_for_renaming (gimple_vop (cfun));
|
| 2115 |
|
|
/* The memory tags and pointers in vectorized statements need to
|
| 2116 |
|
|
have their SSA forms updated. FIXME, why can't this be delayed
|
| 2117 |
|
|
until all the loops have been transformed? */
|
| 2118 |
|
|
update_ssa (TODO_update_ssa);
|
| 2119 |
|
|
|
| 2120 |
|
|
if (vect_print_dump_info (REPORT_DETAILS))
|
| 2121 |
|
|
fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
|
| 2122 |
|
|
|
| 2123 |
|
|
destroy_bb_vec_info (bb_vinfo);
|
| 2124 |
|
|
}
|
| 2125 |
|
|
|