OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [config/] [arm/] [neon-schedgen.ml] - Blame information for rev 749

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 709 jeremybenn
(* Emission of the core of the Cortex-A8 NEON scheduling description.
2
   Copyright (C) 2007, 2010 Free Software Foundation, Inc.
3
   Contributed by CodeSourcery.
4
   This file is part of GCC.
5
 
6
   GCC is free software; you can redistribute it and/or modify it under
7
   the terms of the GNU General Public License as published by the Free
8
   Software Foundation; either version 3, or (at your option) any later
9
   version.
10
 
11
   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12
   WARRANTY; without even the implied warranty of MERCHANTABILITY or
13
   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14
   for more details.
15
 
16
   You should have received a copy of the GNU General Public License
17
   along with GCC; see the file COPYING3.  If not see
18
   .
19
*)
20
 
21
(* This scheduling description generator works as follows.
22
   - Each group of instructions has source and destination requirements
23
     specified and a list of cores supported. This is then filtered
24
     and per core scheduler descriptions are generated out.
25
     The reservations generated are prefixed by the name of the
26
     core and the check is performed on the basis of what the tuning
27
     string is. Running this will generate Neon scheduler descriptions
28
     for all cores supported.
29
 
30
     The source requirements may be specified using
31
     Source (the stage at which all source operands not otherwise
32
     described are read), Source_m (the stage at which Rm operands are
33
     read), Source_n (likewise for Rn) and Source_d (likewise for Rd).
34
   - For each group of instructions the earliest stage where a source
35
     operand may be required is calculated.
36
   - Each group of instructions is selected in turn as a producer.
37
     The latencies between this group and every other group are then
38
     calculated, yielding up to four values for each combination:
39
        1. Producer -> consumer Rn latency
40
        2. Producer -> consumer Rm latency
41
        3. Producer -> consumer Rd (as a source) latency
42
        4. Producer -> consumer worst-case latency.
43
     Value 4 is calculated from the destination availability requirements
44
     of the consumer and the earliest source availability requirements
45
     of the producer.
46
   - The largest Value 4 calculated for the current producer is the
47
     worse-case latency, L, for that instruction group.  This value is written
48
     out in a define_insn_reservation for the producer group.
49
   - For each producer and consumer pair, the latencies calculated above
50
     are collated.  The average (of up to four values) is calculated and
51
     if this average is different from the worst-case latency, an
52
     unguarded define_bypass construction is issued for that pair.
53
     (For each pair only one define_bypass construction will be emitted,
54
     and at present we do not emit specific guards.)
55
*)
56
 
57
let find_with_result fn lst =
58
  let rec scan = function
59
      [] -> raise Not_found
60
    | l::ls ->
61
      match fn l with
62
          Some result -> result
63
       | _ -> scan ls in
64
    scan lst
65
 
66
let n1 = 1 and n2 = 2 and n3 = 3 and n4 = 4 and n5 = 5 and n6 = 6
67
    and n7 = 7 and n8 = 8 and n9 = 9
68
 
69
type availability = Source of int
70
                  | Source_n of int
71
                  | Source_m of int
72
                  | Source_d of int
73
                  | Dest of int
74
                  | Dest_n_after of int * int
75
 
76
type guard = Guard_none | Guard_only_m | Guard_only_n | Guard_only_d
77
 
78
(* Reservation behaviors.  All but the last row here correspond to one
79
   pipeline each.  Each constructor will correspond to one
80
   define_reservation.  *)
81
type reservation =
82
  Mul | Mul_2cycle | Mul_4cycle
83
| Shift | Shift_2cycle
84
| ALU | ALU_2cycle
85
| Fmul | Fmul_2cycle
86
| Fadd | Fadd_2cycle
87
(* | VFP *)
88
| Permute of int
89
| Ls of int
90
| Fmul_then_fadd | Fmul_then_fadd_2
91
 
92
type core = CortexA8 | CortexA9
93
let allCores = [CortexA8; CortexA9]
94
let coreStr = function
95
    CortexA8 -> "cortex_a8"
96
  | CortexA9 -> "cortex_a9"
97
 
98
let tuneStr = function
99
    CortexA8 -> "cortexa8"
100
   | CortexA9 -> "cortexa9"
101
 
102
 
103
(* This table must be kept as short as possible by conflating
104
   entries with the same availability behavior.
105
 
106
   First components: instruction group names
107
   Second components: availability requirements, in the order in which
108
   they should appear in the comments in the .md file.
109
   Third components: reservation info
110
   Fourth components: List of supported cores.
111
*)
112
let availability_table = [
113
  (* NEON integer ALU instructions.  *)
114
  (* vbit vbif vbsl vorr vbic vnot vcls vclz vcnt vadd vand vorr
115
     veor vbic vorn ddd qqq *)
116
  "neon_int_1", [Source n2; Dest n3], ALU, allCores;
117
  (* vadd vsub qqd vsub ddd qqq *)
118
  "neon_int_2", [Source_m n1; Source_n n2; Dest n3], ALU, allCores;
119
  (* vsum vneg dd qq vadd vsub qdd *)
120
  "neon_int_3", [Source n1; Dest n3], ALU, allCores;
121
  (* vabs vceqz vcgez vcbtz vclez vcltz vadh vradh vsbh vrsbh dqq *)
122
  (* vhadd vrhadd vqadd vtst ddd qqq *)
123
  "neon_int_4", [Source n2; Dest n4], ALU, allCores;
124
  (* vabd qdd vhsub vqsub vabd vceq vcge vcgt vmax vmin vfmx vfmn ddd ddd *)
125
  "neon_int_5", [Source_m n1; Source_n n2; Dest n4], ALU, allCores;
126
  (* vqneg vqabs dd qq *)
127
  "neon_vqneg_vqabs", [Source n1; Dest n4], ALU, allCores;
128
  (* vmov vmvn *)
129
  "neon_vmov", [Dest n3], ALU, allCores;
130
  (* vaba *)
131
  "neon_vaba", [Source_n n2; Source_m n1; Source_d n3; Dest n6], ALU, allCores;
132
  "neon_vaba_qqq",
133
    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)],
134
   ALU_2cycle, allCores;
135
  (* vsma *)
136
  "neon_vsma", [Source_m n1; Source_d n3; Dest n6], ALU, allCores;
137
 
138
  (* NEON integer multiply instructions.  *)
139
  (* vmul, vqdmlh, vqrdmlh *)
140
  (* vmul, vqdmul, qdd 16/8 long 32/16 long *)
141
  "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long", [Source n2; Dest n6],
142
   Mul, allCores;
143
  "neon_mul_qqq_8_16_32_ddd_32", [Source n2; Dest_n_after (1, n6)],
144
   Mul_2cycle, allCores;
145
  (* vmul, vqdmul again *)
146
  "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar",
147
    [Source_n n2; Source_m n1; Dest_n_after (1, n6)], Mul_2cycle, allCores;
148
  (* vmla, vmls *)
149
  "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long",
150
    [Source_n n2; Source_m n2; Source_d n3; Dest n6], Mul, allCores;
151
  "neon_mla_qqq_8_16",
152
    [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n6)],
153
   Mul_2cycle, allCores;
154
  "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long",
155
    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n6)],
156
   Mul_2cycle, allCores;
157
  "neon_mla_qqq_32_qqd_32_scalar",
158
    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (3, n6)],
159
   Mul_4cycle, allCores;
160
  (* vmul, vqdmulh, vqrdmulh *)
161
  (* vmul, vqdmul *)
162
  "neon_mul_ddd_16_scalar_32_16_long_scalar",
163
    [Source_n n2; Source_m n1; Dest n6], Mul, allCores;
164
  "neon_mul_qqd_32_scalar",
165
    [Source_n n2; Source_m n1; Dest_n_after (3, n6)], Mul_4cycle, allCores;
166
  (* vmla, vmls *)
167
  (* vmla, vmla, vqdmla, vqdmls *)
168
  "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar",
169
    [Source_n n2; Source_m n1; Source_d n3; Dest n6], Mul, allCores;
170
 
171
  (* NEON integer shift instructions.  *)
172
  (* vshr/vshl immediate, vshr_narrow, vshl_vmvh, vsli_vsri_ddd *)
173
  "neon_shift_1", [Source n1; Dest n3], Shift, allCores;
174
  (* vqshl, vrshr immediate; vqshr, vqmov, vrshr, vqrshr narrow, allCores;
175
     vqshl_vrshl_vqrshl_ddd *)
176
  "neon_shift_2", [Source n1; Dest n4], Shift, allCores;
177
  (* vsli, vsri and vshl for qqq *)
178
  "neon_shift_3", [Source n1; Dest_n_after (1, n3)], Shift_2cycle, allCores;
179
  "neon_vshl_ddd", [Source n1; Dest n1], Shift, allCores;
180
  "neon_vqshl_vrshl_vqrshl_qqq", [Source n1; Dest_n_after (1, n4)],
181
    Shift_2cycle, allCores;
182
  "neon_vsra_vrsra", [Source_m n1; Source_d n3; Dest n6], Shift, allCores;
183
 
184
  (* NEON floating-point instructions.  *)
185
  (* vadd, vsub, vabd, vmul, vceq, vcge, vcgt, vcage, vcagt, vmax, vmin *)
186
  (* vabs, vneg, vceqz, vcgez, vcgtz, vclez, vcltz, vrecpe, vrsqrte, vcvt *)
187
  "neon_fp_vadd_ddd_vabs_dd", [Source n2; Dest n5], Fadd, allCores;
188
  "neon_fp_vadd_qqq_vabs_qq", [Source n2; Dest_n_after (1, n5)],
189
    Fadd_2cycle, allCores;
190
  (* vsum, fvmx, vfmn *)
191
  "neon_fp_vsum", [Source n1; Dest n5], Fadd, allCores;
192
  "neon_fp_vmul_ddd", [Source_n n2; Source_m n1; Dest n5], Fmul, allCores;
193
  "neon_fp_vmul_qqd", [Source_n n2; Source_m n1; Dest_n_after (1, n5)],
194
    Fmul_2cycle, allCores;
195
  (* vmla, vmls *)
196
  "neon_fp_vmla_ddd",
197
    [Source_n n2; Source_m n2; Source_d n3; Dest n9], Fmul_then_fadd, allCores;
198
  "neon_fp_vmla_qqq",
199
    [Source_n n2; Source_m n2; Source_d n3; Dest_n_after (1, n9)],
200
    Fmul_then_fadd_2, allCores;
201
  "neon_fp_vmla_ddd_scalar",
202
    [Source_n n2; Source_m n1; Source_d n3; Dest n9], Fmul_then_fadd, allCores;
203
  "neon_fp_vmla_qqq_scalar",
204
    [Source_n n2; Source_m n1; Source_d n3; Dest_n_after (1, n9)],
205
    Fmul_then_fadd_2, allCores;
206
  "neon_fp_vrecps_vrsqrts_ddd", [Source n2; Dest n9], Fmul_then_fadd, allCores;
207
  "neon_fp_vrecps_vrsqrts_qqq", [Source n2; Dest_n_after (1, n9)],
208
    Fmul_then_fadd_2, allCores;
209
 
210
  (* NEON byte permute instructions.  *)
211
  (* vmov; vtrn and vswp for dd; vzip for dd; vuzp for dd; vrev; vext for dd *)
212
  "neon_bp_simple", [Source n1; Dest n2], Permute 1, allCores;
213
  (* vswp for qq; vext for qqq; vtbl with {Dn} or {Dn, Dn1}, allCores;
214
     similarly for vtbx *)
215
  "neon_bp_2cycle", [Source n1; Dest_n_after (1, n2)], Permute 2, allCores;
216
  (* all the rest *)
217
  "neon_bp_3cycle", [Source n1; Dest_n_after (2, n2)], Permute 3, allCores;
218
 
219
  (* NEON load/store instructions.  *)
220
  "neon_ldr", [Dest n1], Ls 1, allCores;
221
  "neon_str", [Source n1], Ls 1, allCores;
222
  "neon_vld1_1_2_regs", [Dest_n_after (1, n1)], Ls 2, allCores;
223
  "neon_vld1_3_4_regs", [Dest_n_after (2, n1)], Ls 3, allCores;
224
  "neon_vld2_2_regs_vld1_vld2_all_lanes", [Dest_n_after (1, n2)], Ls 2, allCores;
225
  "neon_vld2_4_regs", [Dest_n_after (2, n2)], Ls 3, allCores;
226
  "neon_vld3_vld4", [Dest_n_after (3, n2)], Ls 4, allCores;
227
  "neon_vst1_1_2_regs_vst2_2_regs", [Source n1], Ls 2, allCores;
228
  "neon_vst1_3_4_regs", [Source n1], Ls 3, allCores;
229
  "neon_vst2_4_regs_vst3_vst4", [Source n1], Ls 4, allCores;
230
  "neon_vst3_vst4", [Source n1], Ls 4, allCores;
231
  "neon_vld1_vld2_lane", [Source n1; Dest_n_after (2, n2)], Ls 3, allCores;
232
  "neon_vld3_vld4_lane", [Source n1; Dest_n_after (4, n2)], Ls 5, allCores;
233
  "neon_vst1_vst2_lane", [Source n1], Ls 2, allCores;
234
  "neon_vst3_vst4_lane", [Source n1], Ls 3, allCores;
235
  "neon_vld3_vld4_all_lanes", [Dest_n_after (1, n2)], Ls 3, allCores;
236
 
237
  (* NEON register transfer instructions.  *)
238
  "neon_mcr", [Dest n2], Permute 1, allCores;
239
  "neon_mcr_2_mcrr", [Dest n2], Permute 2, allCores;
240
  (* MRC instructions are in the .tpl file.  *)
241
]
242
 
243
(* Augment the tuples in the availability table with an extra component
244
   that describes the earliest stage where a source operand may be
245
   required.  (It is also possible that an entry in the table has no
246
   source requirements.)  *)
247
let calculate_sources =
248
  List.map (fun (name, avail, res, cores) ->
249
              let earliest_stage =
250
                List.fold_left
251
                  (fun cur -> fun info ->
252
                     match info with
253
                       Source stage
254
                     | Source_n stage
255
                     | Source_m stage
256
                     | Source_d stage ->
257
                         (match cur with
258
                           None -> Some stage
259
                         | Some stage' when stage < stage' -> Some stage
260
                         | _ -> cur)
261
                     | _ -> cur) None avail
262
              in
263
                (name, avail, res, earliest_stage))
264
 
265
(* Find the stage, if any, at the end of which a group produces a result.  *)
266
let find_dest (attr, avail, _, _) =
267
  try
268
    find_with_result
269
      (fun av -> match av with
270
                   Dest st -> Some (Some st)
271
                 | Dest_n_after (after, st) -> Some (Some (after + st))
272
                 | _ -> None) avail
273
  with Not_found -> None
274
 
275
(* Find the worst-case latency between a producer and a consumer.  *)
276
let worst_case_latency producer (_, _, _, earliest_required) =
277
  let dest = find_dest producer in
278
    match earliest_required, dest with
279
      None, _ ->
280
        (* The consumer doesn't have any source requirements.  *)
281
        None
282
    | _, None ->
283
        (* The producer doesn't produce any results (e.g. a store insn).  *)
284
        None
285
    | Some consumed, Some produced -> Some (produced - consumed + 1)
286
 
287
(* Helper function for below.  *)
288
let latency_calc f producer (_, avail, _, _) =
289
  try
290
    let source_avail = find_with_result f avail in
291
      match find_dest producer with
292
        None ->
293
          (* The producer does not produce a result.  *)
294
          Some 0
295
      | Some produced ->
296
          let latency = produced - source_avail + 1 in
297
            (* Latencies below zero are raised to zero since we don't have
298
               delay slots.  *)
299
            if latency < 0 then Some 0 else Some latency
300
  with Not_found -> None
301
 
302
(* Find any Rm latency between a producer and a consumer.  If no
303
   Rm source requirement is explicitly specified for the consumer,
304
   return "positive infinity".  Also return "positive infinity" if
305
   the latency matches the supplied worst-case latency for this
306
   producer.  *)
307
let get_m_latency producer consumer =
308
  match latency_calc (fun av -> match av with Source_m stage -> Some stage
309
                                            | _ -> None) producer consumer
310
  with None -> [] | Some latency -> [(Guard_only_m, latency)]
311
 
312
(* Likewise for Rn.  *)
313
let get_n_latency producer consumer =
314
  match latency_calc (fun av -> match av with Source_n stage -> Some stage
315
                                            | _ -> None) producer consumer
316
  with None -> [] | Some latency -> [(Guard_only_n, latency)]
317
 
318
(* Likewise for Rd.  *)
319
let get_d_latency producer consumer =
320
  match
321
    latency_calc (fun av -> match av with Source_d stage -> Some stage
322
                                        | _ -> None) producer consumer
323
  with None -> [] | Some latency -> [(Guard_only_d, latency)]
324
 
325
(* Given a producer and a consumer, work out the latency of the producer
326
   to the consumer in each of the four cases (availability information
327
   permitting) identified at the top of this file.  Return the
328
   consumer, the worst-case unguarded latency and any guarded latencies.  *)
329
let calculate_latencies producer consumer =
330
  let worst = worst_case_latency producer consumer in
331
  let m_latency = get_m_latency producer consumer in
332
  let n_latency = get_n_latency producer consumer in
333
  let d_latency = get_d_latency producer consumer in
334
    (consumer, worst, m_latency @ n_latency @ d_latency)
335
 
336
(* Helper function for below.  *)
337
let pick_latency largest worst guards =
338
  let guards =
339
    match worst with
340
      None -> guards
341
    | Some worst -> (Guard_none, worst) :: guards
342
  in
343
  if List.length guards = 0 then None else
344
    let total_latency =
345
      List.fold_left (fun acc -> fun (_, latency) -> acc + latency) 0 guards
346
    in
347
    let average_latency = (float_of_int total_latency) /.
348
                          (float_of_int (List.length guards)) in
349
    let rounded_latency = int_of_float (ceil average_latency) in
350
      if rounded_latency = largest then None
351
      else Some (Guard_none, rounded_latency)
352
 
353
(* Collate all bypasses for a particular producer as required in
354
   worst_case_latencies_and_bypasses.  (By this stage there is a maximum
355
   of one bypass from this producer to any particular consumer listed
356
   in LATENCIES.)  Use a hash table to collate bypasses with the
357
   same latency and guard.  *)
358
let collate_bypasses (producer_name, _, _, _) largest latencies core =
359
  let ht = Hashtbl.create 42 in
360
  let keys = ref [] in
361
    List.iter (
362
      fun ((consumer, _, _, _), worst, guards) ->
363
        (* Find out which latency to use.  Ignoring latencies that match
364
           the *overall* worst-case latency for this producer (which will
365
           be in define_insn_reservation), we have to examine:
366
           1. the latency with no guard between this producer and this
367
              consumer; and
368
           2. any guarded latency.  *)
369
        let guard_latency_opt = pick_latency largest worst guards in
370
          match guard_latency_opt with
371
            None -> ()
372
          | Some (guard, latency) ->
373
            begin
374
              (if (try ignore (Hashtbl.find ht (guard, latency)); false
375
                   with Not_found -> true) then
376
                 keys := (guard, latency) :: !keys);
377
              Hashtbl.add ht (guard, latency) ((coreStr core) ^ "_" ^ consumer)
378
            end
379
    ) latencies;
380
    (* The hash table now has bypasses collated so that ones with the
381
       same latency and guard have the same keys.  Walk through all the
382
       keys, extract the associated bypasses, and concatenate the names
383
       of the consumers for each bypass.  *)
384
    List.map (
385
      fun ((guard, latency) as key) ->
386
        let consumers = Hashtbl.find_all ht key in
387
          (producer_name,
388
           String.concat ",\\\n               " consumers,
389
           latency,
390
           guard)
391
      ) !keys
392
 
393
(* For every producer, find the worst-case latency between it and
394
   *any* consumer.  Also determine (if such a thing exists) the
395
   lowest-latency bypass from each producer to each consumer.  Group
396
   the output in such a way that all bypasses with the same producer
397
   and latency are together, and so that bypasses with the worst-case
398
   latency are ignored.  *)
399
let worst_case_latencies_and_bypasses core =
400
  let rec f (worst_acc, bypasses_acc) prev xs =
401
    match xs with
402
      [] -> (worst_acc, bypasses_acc)
403
    | ((producer_name, producer_avail, res_string, _) as producer)::next ->
404
      (* For this particular producer, work out the latencies between
405
         it and every consumer.  *)
406
      let latencies =
407
        List.fold_left (fun acc -> fun consumer ->
408
                          (calculate_latencies producer consumer) :: acc)
409
                       [] (prev @ xs)
410
      in
411
        (* Now work out what the overall worst case latency was for this
412
           particular producer.  *)
413
        match latencies with
414
          [] -> assert false
415
        | _ ->
416
          let comp_fn (_, l1, _) (_, l2, _) =
417
            if l1 > l2 then -1 else if l1 = l2 then 0 else 1
418
          in
419
          let largest =
420
            match List.hd (List.sort comp_fn latencies) with
421
              (_, None, _) -> 0 (* Producer has no consumers. *)
422
            | (_, Some worst, _) -> worst
423
          in
424
          (* Having got the largest latency, collect all bypasses for
425
             this producer and filter out those with that larger
426
             latency.  Record the others for later emission.  *)
427
          let bypasses = collate_bypasses producer largest latencies core in
428
            (* Go on to process remaining producers, having noted
429
               the result for this one.  *)
430
            f ((producer_name, producer_avail, largest,
431
                res_string) :: worst_acc,
432
               bypasses @ bypasses_acc)
433
              (prev @ [producer]) next
434
  in
435
    f ([], []) []
436
 
437
(* Emit a helpful comment for a define_insn_reservation.  *)
438
let write_comment producer avail =
439
  let seen_source = ref false in
440
  let describe info =
441
    let read = if !seen_source then "" else "read " in
442
    match info with
443
      Source stage ->
444
        seen_source := true;
445
        Printf.printf "%stheir source operands at N%d" read stage
446
    | Source_n stage ->
447
        seen_source := true;
448
        Printf.printf "%stheir (D|Q)n operands at N%d" read stage
449
    | Source_m stage ->
450
        seen_source := true;
451
        Printf.printf "%stheir (D|Q)m operands at N%d" read stage
452
    | Source_d stage ->
453
        Printf.printf "%stheir (D|Q)d operands at N%d" read stage
454
    | Dest stage ->
455
        Printf.printf "produce a result at N%d" stage
456
    | Dest_n_after (after, stage) ->
457
        Printf.printf "produce a result at N%d on cycle %d" stage (after + 1)
458
  in
459
    Printf.printf ";; Instructions using this reservation ";
460
    let rec f infos x =
461
      let sep = if x mod 2 = 1 then "" else "\n;;" in
462
      match infos with
463
        [] -> assert false
464
      | [info] -> describe info; Printf.printf ".\n"
465
      | info::(_::[] as infos) ->
466
          describe info; Printf.printf ", and%s " sep; f infos (x+1)
467
      | info::infos -> describe info; Printf.printf ",%s " sep; f infos (x+1)
468
    in
469
      f avail 0
470
 
471
 
472
(* Emit a define_insn_reservation for each producer.  The latency
473
   written in will be its worst-case latency.  *)
474
let emit_insn_reservations core =
475
  let corestring = coreStr core in
476
  let tunestring = tuneStr core
477
  in  List.iter (
478
     fun (producer, avail, latency, reservation) ->
479
        write_comment producer avail;
480
        Printf.printf "(define_insn_reservation \"%s_%s\" %d\n"
481
            corestring producer latency;
482
            Printf.printf "  (and (eq_attr \"tune\" \"%s\")\n" tunestring;
483
        Printf.printf "       (eq_attr \"neon_type\" \"%s\"))\n" producer;
484
        let str =
485
          match reservation with
486
            Mul -> "dp" | Mul_2cycle -> "dp_2" | Mul_4cycle -> "dp_4"
487
          | Shift -> "dp" | Shift_2cycle -> "dp_2"
488
          | ALU -> "dp" | ALU_2cycle -> "dp_2"
489
          | Fmul -> "dp" | Fmul_2cycle -> "dp_2"
490
          | Fadd -> "fadd" | Fadd_2cycle -> "fadd_2"
491
          | Ls 1 -> "ls"
492
          | Ls n -> "ls_" ^ (string_of_int n)
493
          | Permute 1 -> "perm"
494
          | Permute n -> "perm_" ^ (string_of_int n)
495
          | Fmul_then_fadd -> "fmul_then_fadd"
496
          | Fmul_then_fadd_2 -> "fmul_then_fadd_2"
497
        in
498
          Printf.printf "  \"%s_neon_%s\")\n\n" corestring str
499
    )
500
 
501
(* Given a guard description, return the name of the C function to
502
   be used as the guard for define_bypass.  *)
503
let guard_fn g =
504
  match g with
505
    Guard_only_m -> "arm_neon_only_m_dependency"
506
  | Guard_only_n -> "arm_neon_only_n_dependency"
507
  | Guard_only_d -> "arm_neon_only_d_dependency"
508
  | Guard_none -> assert false
509
 
510
(* Emit a define_bypass for each bypass.  *)
511
let emit_bypasses core =
512
  List.iter (
513
      fun (producer, consumers, latency, guard) ->
514
        Printf.printf "(define_bypass %d \"%s_%s\"\n"
515
        latency (coreStr core) producer;
516
 
517
        if guard = Guard_none then
518
          Printf.printf "               \"%s\")\n\n" consumers
519
        else
520
          begin
521
            Printf.printf "               \"%s\"\n" consumers;
522
            Printf.printf "               \"%s\")\n\n" (guard_fn guard)
523
          end
524
    )
525
 
526
 
527
let calculate_per_core_availability_table core availability_table =
528
  let table = calculate_sources availability_table in
529
  let worst_cases, bypasses = worst_case_latencies_and_bypasses core table in
530
    emit_insn_reservations core (List.rev worst_cases);
531
    Printf.printf ";; Exceptions to the default latencies.\n\n";
532
    emit_bypasses core bypasses
533
 
534
let calculate_core_availability_table core availability_table =
535
let filter_core = List.filter (fun (_, _, _, cores)
536
                                   -> List.exists ((=) core) cores)
537
in calculate_per_core_availability_table core (filter_core availability_table)
538
 
539
 
540
(* Program entry point.  *)
541
let main =
542
  List.map (fun core -> calculate_core_availability_table
543
                core availability_table) allCores

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.