OpenCores
URL https://opencores.org/ocsvn/hicovec/hicovec/trunk

Subversion Repositories hicovec

[/] [hicovec/] [branches/] [avendor/] [assembler/] [cputest.txt] - Blame information for rev 12

Details | Compare with Previous | View Log

Line No. Rev Author Line
1 2 hmanske
; ************************************************************************
2
; * CPU validation program                                               *
3
; ************************************************************************
4
; * Executes all possible commands of the target cpu to be sure that it  *
5
; * works properly.                                                      *
6
; *                                                                      *
7
; * If an error occurs, the processor will run into a halt command.      *
8
; * Otherwise it will continue execution to the finish symbol and loop   *
9
; * there infinety                                                       *
10
; *                                                                      *
11
; ************************************************************************
12
; * Author:     Harald Manske, haraldmanske@gmx.de                       *
13
; * Version:    1.0                                                      *
14 4 hmanske
; * Plattform:  HiCoVec (highly configurable vector processor)           *
15 2 hmanske
; ************************************************************************
16
 
17
; configuration of the program
18
                EQU n 8             ;requires n >= 4
19
                EQU k 20            ;requires k = 16
20
                EQU use_shuffle 0   ;requires shuffle unit
21
                EQU use_mul 0       ;requires both, scalar and vector multiplicate
22
                EQU use_vmolr 0     ;requires vmol and vmor commands (32 bit width)
23
 
24
                JMP 0+START
25
                HALT
26
 
27
 
28
;test data
29
                ORG $10
30
DATA_V:         DC $B2D463BC        ; vector input data (max k=32)
31
DATA_V2:        DC $AA67Df42
32
                DC $A3B78EF3
33
                DC $4E6AC974
34
                DC $8FEE432E
35
                DC $FF3AB876
36
                DC $43B57621
37
                DC $8E6AC974 ;8
38
                DC $7FF3ACD3
39
                DC $921BC4D1
40
                DC $637DCF67
41
                DC $7897AABD
42
                DC $973EE234
43
                DC $005C6E39
44
                DC $A1BC7A35
45
                DC $7FF8334E ;16
46
                DC $A3486CDF
47
                DC $94CA193B
48
                DC $445E97C1
49
                DC $8836D35E
50
                DC $A9B2C45F
51
                DC $97A36BFF
52
                DC $79835987
53
                DC $DCBEDFEA
54
                DC $975CC213
55
                DC $39DA322B
56
                DC $8411DEA9
57
                DC $FFFFFFFF
58
                DC $129DE3AF
59
                DC $7881D74A
60
                DC $AA8D35A8
61
                DC $5247877D
62
 
63
V_RES:          ORG $30             ; vector result data (max k=32)
64
 
65
                ORG $50
66
MASK_HW:        DC $FFFF0000        ; mask to get only high word
67
CMP_VAL:        DC $ABCD            ; value to compare with immediate
68
CMP_VAL2:       DC $FEDC            ; value to compare with immediate
69
DATA_A:         DC $A3BF74E5        ; operand A
70
DATA_B:         DC $03C7A483        ; operand B
71
DATA_FF:        DC $FFFFFFFF        ; operand FF
72
STORE_DEST:     DC $0               ; destination for store command
73
 
74
;results
75
AND_RES:        DC $3872481         ; result: A and B
76
OR_RES:         DC $A3FFF4E7        ; result: A or B
77
XOR_RES:        DC $A078D066        ; result: A xor B
78
ADD_RES:        DC $A7871968        ; result: A + B
79
SUB_RES:        DC $9FF7D062        ; result: A - B
80
INC_RES:        DC $A3BF74E6        ; result: A + 1
81
DEC_RES:        DC $A3BF74E4        ; result: A - 1
82
LSL_RES:        DC $477EE9CA        ; result: lsl A
83
LSR_RES:        DC $51DFBA72        ; result: lsr A
84
MUL_RES:        DC $4B1E852F        ; result A * B (16bit * 16bit)
85
 
86
ROL_RES:        DC $477EE9CA        ; result: rol A, carry not set
87
ROL_RES_C:      DC $477EE9CB        ; result: rol A, carry set
88
 
89
ROR_RES:        DC $51DFBA72
90
ROR_RES_C:      DC $D1DFBA72        ; result: ror A, carry set
91
 
92
RES_VSHUF_8L:   DC $8EF3AA67        ; result: shuffle vwidth=00 low word
93
RES_VSHUF_8H:   DC $DF42B2D4        ; result: shuffle vwidth=00 high word
94
 
95
;program start
96
START:          NOP
97
 
98
;test flag commands and conditional jumps
99
TEST_ZERO:      SEZ
100
                JZ 0+TEST_NOTZERO
101
                HALT
102
TEST_NOTZERO:   CLZ
103
                JZ 0+ERR_SCALAR
104
                JNZ 0+TEST_CARRY
105
                HALT
106
TEST_CARRY:     SEC
107
                JC 0+TEST_NOTCARRY
108
                HALT
109
TEST_NOTCARRY:  CLC
110
                JC 0+ERR_SCALAR
111
                JNC 0+TEST_FLAGS
112
                HALT
113
TEST_FLAGS:     SUB 0,0,1
114
                JZ 0+ERR_SCALAR
115
                JNC 0+ERR_SCALAR
116
                JNZ 0+TEST_FLAGS2
117
                HALT
118
 TEST_FLAGS2:   ADD 0,0,0
119
                JC 0+ERR_SCALAR
120
                JZ 0+TEST_LD
121
                HALT
122
 
123
;test load operation
124
TEST_LD:        LD A,0+CMP_VAL
125
                JZ 0+ERR_SCALAR
126
                SUB X,A,$ABCD
127
                JC 0+ERR_SCALAR
128
                JZ 0+TEST_LD2
129
                HALT
130
 TEST_LD2:      OR Y,0,CMP_VAL
131
                OR X,0,1
132
                LD Y,Y+X
133
                SUB 0,Y,$FEDC
134
                JNZ 0+ERR_SCALAR
135
                JZ 0+TEST_STORE
136
                HALT
137
 
138
;test store operation
139
TEST_STORE:     LD X,0+DATA_A
140
                LD Y,0+DATA_B
141
                ADD A,X,Y
142
                ST 0+STORE_DEST,A
143
                LD X,0+ADD_RES
144
                LD Y,0+STORE_DEST
145
                SUB 0,X,Y
146
                JZ 0+TEST_STORE2
147
                HALT
148
 TEST_STORE2:   OR A,0,$1234
149
                OR X,0,1
150
                OR Y,0,STORE_DEST
151
                DEC Y,Y
152
                ST X+Y,A
153
                OR X,0,0
154
                LD X,0+STORE_DEST
155
                SUB Y,X,$1234
156
                JNZ 0+ERR_SCALAR
157
 
158
;test arithmetic and logic operations
159
TEST_ADD:       LD X,0+DATA_A
160
                LD Y,0+DATA_B
161
                ADD A,X,Y
162
                JC 0+ERR_SCALAR
163
                JZ 0+ERR_SCALAR
164
                LD Y,0+ADD_RES
165
                SUB 0,A,Y
166
                JNZ 0+ERR_SCALAR
167
                JC 0+ERR_SCALAR
168
                LD X, 0+DATA_FF
169
                ADD A,X,2
170
                JNC 0+ERR_SCALAR
171
                JZ 0+ERR_SCALAR
172
                SUB 0,A,1
173
                JZ 0+TEST_ADC
174
                HALT
175
 
176
TEST_ADC:       LD A,0+DATA_A
177
                LD X,0+DATA_B
178
                CLC
179
                ADC Y,A,X
180
                JZ 0+ERR_SCALAR
181
                JC 0+ERR_SCALAR
182
                LD A,0+ADD_RES
183
                SUB 0,A,Y
184
                JNZ 0+ERR_SCALAR
185
 
186
                LD Y,0+DATA_A
187
                LD X,0+DATA_B
188
                SEC
189
                ADC A,X,Y
190
                JZ 0+ERR_SCALAR
191
                JC 0+ERR_SCALAR
192
                SUB A,A,1
193
                LD Y,0+ADD_RES
194
                SUB 0,A,Y
195
                JNZ 0+ERR_SCALAR
196
                JNC 0+TEST_SUB
197
                HALT
198
 
199
TEST_SUB:       LD X,0+DATA_A
200
                LD Y,0+DATA_B
201
                SUB A,X,Y
202
                JC 0+ERR_SCALAR
203
                LD X,0+SUB_RES
204
                SUB 0,A,X
205
                JNZ 0+ERR_SCALAR
206
                JC 0+ERR_SCALAR
207
                LD X,0+DATA_A
208
                SUB A,Y,X
209
                JNC 0+ERR_SCALAR
210
                JNZ 0+TEST_SBC
211
                HALT
212
 
213
TEST_SBC:       LD A,0+DATA_A
214
                LD Y,0+DATA_B
215
                CLC
216
                SUB X,A,Y
217
                SBC A,A,Y
218
                JZ 0+ERR_SCALAR
219
                JC 0+ERR_SCALAR
220
                SUB 0,X,A
221
                JNZ 0+ERR_SCALAR
222
                JC 0+ERR_SCALAR
223
                LD A,0+DATA_A
224
                SEC
225
                SBC A,A,Y
226
                JZ 0+ERR_SCALAR
227
                JC 0+ERR_SCALAR
228
                SUB X,X,1
229
                SUB 0,A,X
230
                JC 0+ERR_SCALAR
231
                JZ 0+TEST_INC
232
                HALT
233
 
234
TEST_INC:       LD A,0+DATA_A
235
                INC A,A
236
                LD X,0+INC_RES
237
                LD Y,0+DATA_A
238
                ADD Y,Y,1
239
                SUB 0,A,X
240
                JNZ 0+ERR_SCALAR
241
                SUB 0,A,Y
242
                JNZ 0+ERR_SCALAR
243
                LD A,0+DATA_FF
244
                INC A,A
245
                JNC 0+ERR_SCALAR
246
                JZ 0+TEST_DEC
247
                HALT
248
 
249
TEST_DEC:       OR A,0,0
250
                DEC A,A
251
                JNC 0+ERR_SCALAR
252
                JZ 0+ERR_SCALAR
253
                LD X,0+DATA_FF
254
                SUB 0,A,X
255
                JNZ 0+ERR_SCALAR
256
                JC 0+ERR_SCALAR
257
                LD A,0+DATA_A
258
                DEC A,A
259
                LD Y,0+DEC_RES
260
                SUB 0,A,Y
261
                JC 0+ERR_SCALAR
262
                JZ 0+TEST_AND
263
                HALT
264
 
265
TEST_AND:       LD A,0+DATA_A
266
                LD X,0+DATA_B
267
                AND Y,A,X
268
                LD A,0+AND_RES
269
                SUB 0,Y,A
270
                JC 0+ERR_SCALAR
271
                JNZ 0+ERR_SCALAR
272
                LD A,0+DATA_FF
273
                AND X,Y,A
274
                SUB 0,Y,X
275
                JNZ 0+ERR_SCALAR
276
                OR Y,0,$3456
277
                AND Y,Y,0
278
                JZ 0+TEST_OR
279
                HALT
280
 
281
TEST_OR:        LD X,0+DATA_A
282
                LD Y,0+DATA_B
283
                OR A,X,Y
284
                LD X,0+OR_RES
285
                JZ 0+ERR_SCALAR
286
                SUB 0,A,X
287
                JNZ 0+ERR_SCALAR
288
                JC 0+ERR_SCALAR
289
                OR A,A,$FF
290
                AND A,A,$FF
291
                SUB 0,A,$FF
292
                JC 0+ERR_SCALAR
293
                JZ 0+TEST_XOR
294
                HALT
295
 
296
TEST_XOR:       LD X,0+DATA_A
297
                LD Y,0+DATA_B
298
                XOR A,X,Y
299
                LD X,0+XOR_RES
300
                SUB 0,A,X
301
                JC 0+ERR_SCALAR
302
                JNZ 0+ERR_SCALAR
303
                LD Y,0+ADD_RES
304
                XOR A,A,Y
305
                SUB 0,A,X
306
                JZ 0+ERR_SCALAR
307
                XOR A,A,Y
308
                SUB 0,A,X
309
                JZ 0+TEST_LSL
310
                HALT
311
 
312
TEST_LSL:       LD A,0+DATA_A
313
                LSL A,A
314
                JNC 0+ERR_SCALAR
315
                LD X,0+LSL_RES
316
                SUB 0,A,X
317
                JC 0+ERR_SCALAR
318
                JZ 0+TEST_LSR
319
                HALT
320
 
321
 
322
TEST_LSR:       LD A,0+DATA_A
323
                LSR A,A
324
                JNC 0+ERR_SCALAR
325
                LD X,0+LSR_RES
326
                SUB 0,X,A
327
                JC 0+ERR_SCALAR
328
                JZ 0+TEST_ROL
329
                HALT
330
 
331
TEST_ROL:       CLC
332
                LD Y,0+DATA_A
333
                ROL A,Y
334
                JNC 0+ERR_SCALAR
335
                LD X,0+ROL_RES
336
                SUB 0,A,X
337
                JC 0+ERR_SCALAR
338
                JNZ 0+ERR_SCALAR
339
                SEC
340
                LD Y,0+DATA_A
341
                ROL A,Y
342
                JNC 0+ERR_SCALAR
343
                LD X,0+ROL_RES_C
344
                SUB 0,A,X
345
                JC 0+ERR_SCALAR
346
                JZ 0+TEST_ROR
347
                HALT
348
 
349
TEST_ROR:       CLC
350
                LD Y,0+DATA_A
351
                ROR A,Y
352
                JNC 0+ERR_SCALAR
353
                LD X,0+ROR_RES
354
                SUB 0,A,X
355
                JC 0+ERR_SCALAR
356
                JNZ 0+ERR_SCALAR
357
                SEC
358
                LD A,0+DATA_A
359
                ROR A,A
360
                JNC 0+ERR_SCALAR
361
                LD X,0+ROR_RES_C
362
                SUB 0,A,X
363
                JC 0+ERR_SCALAR
364
                JZ 0+TEST_JAL
365
                HALT
366
 
367
TEST_JAL:       JAL A,0+TEST_JAL2
368
                HALT
369
 TEST_JAL2:     SUB 0,A,TEST_JAL
370
                JNZ 0+ERR_SCALAR
371
                JZ 0+TEST_MUL
372
 
373
TEST_MUL:       OR A,0,use_mul
374
                JZ 0+NO_MUL
375
                LD X,0+DATA_A
376
                LD Y,0+DATA_B
377
                MUL A,X,Y
378
                JC 0+ERR_SCALAR
379
                JZ 0+ERR_SCALAR
380
                LD Y,0+MUL_RES
381
                SUB 0,A,Y
382
                JNZ 0+ERR_SCALAR
383
                JC 0+ERR_SCALAR
384
 NO_MUL:        JMP 0+TEST_VLD_ST
385
 
386
;test cooperative commands
387
TEST_VLD_ST:    OR A,0,0
388
                OR Y,0,0
389
 VLD_ST_INIT:   ST 0+V_RES,A        ;init with 0
390
                INC Y,Y
391
                SUB 0,Y,k
392
                JNZ 0+VLD_ST_INIT
393
 
394
                OR A,0,DATA_V       ;load
395
                VLD R0,0+A
396
 
397
                OR A,0,V_RES
398
                VST 0+A,R0          ;store
399
 
400
                OR Y,0,0
401
 VLD_ST_LOOP:   LD A,Y+V_RES        ;check
402
                LD X,Y+DATA_V
403
                SUB 0,A,X
404
                JNZ 0+ERR_COOP
405
                INC Y,Y
406
                SUB 0,Y,k
407
                JNZ 0+VLD_ST_LOOP
408
                JMP 0+TEST_MOV
409
                HALT
410
 
411
TEST_MOV:       OR A,0,0
412
 MOV_LOOP:      LD Y,A+DATA_V
413
                MOV R1(A),Y         ;scalar => vector
414
                INC A,A
415
                SUB 0,A,k
416
                JNZ 0+MOV_LOOP
417
 
418
                OR A,0,0
419
                OR X,0,0
420
 MOV_LOOP2:     MOV X,R1(A)         ;vector => scalar
421
                LD Y,A+DATA_V
422
                SUB 0,Y,X
423
                JNZ 0+ERR_COOP
424
                INC A,A
425
                SUB 0,A,k
426
                JNZ 0+MOV_LOOP2
427
                JZ 0+TEST_VMOV
428
                HALT
429
 
430
;test vector commands
431
TEST_VMOV:      VMOV R0,R1
432
                VMOV R<2>,R0
433
                VMOV R3,R<2>
434
 
435
                OR A,0,0
436
 VMOV_LOOP:     LD Y,A+DATA_V
437
 
438
                MOV X,R0(A)
439
                SUB 0,Y,X
440
                JNZ 0+ERR_VECTOR
441
 
442
                MOV X,R2(A)
443
                SUB 0,Y,X
444
                JNZ 0+ERR_VECTOR
445
 
446
                MOV X,R3(A)
447
                SUB 0,Y,X
448
                JNZ 0+ERR_VECTOR
449
 
450
                INC A,A
451
                SUB 0,A,k
452
                JNZ 0+VMOV_LOOP
453
 
454
TEST_MOVA:      LD A,0+DATA_A
455
                MOVA R0,A
456
                OR X,0,V_RES
457
                VST 0+X,R0
458
 
459
                OR X,0,0
460
 MOVA_LOOP:     LD Y,X+V_RES
461
                SUB 0,Y,A
462
                JNZ 0+ERR_COOP
463
                INC X,X
464
                SUB 0,X,k
465
                JNZ 0+MOVA_LOOP
466
 
467
;test vector alu commands
468
                OR A,0,DATA_V
469
                VLD R0,0+A
470
                OR A,0,DATA_V2
471
                VLD R1,0+A
472
 
473
TEST_VAND:      VAND.DW R2,R0,R1
474
                OR A,0,V_RES
475
                VST 0+A,R2
476
 
477
                OR A,0,0
478
 VAND_LOOP:     LD X,A+DATA_V
479
                LD Y,A+DATA_V2
480
                AND X,X,Y
481
                LD Y,A+V_RES
482
                SUB 0,X,Y
483
                JNZ 0+ERR_VALU
484
                INC A,A
485
                SUB 0,A,k
486
                JNZ 0+VAND_LOOP
487
 
488
TEST_VOR:       VOR.DW R2,R0,R1
489
                OR A,0,V_RES
490
                VST 0+A,R2
491
 
492
                OR A,0,0
493
 VOR_LOOP:      LD X,A+DATA_V
494
                LD Y,A+DATA_V2
495
                OR X,X,Y
496
                LD Y,A+V_RES
497
                SUB 0,X,Y
498
                JNZ 0+ERR_VALU
499
                INC A,A
500
                SUB 0,A,k
501
                JNZ 0+VOR_LOOP
502
 
503
TEST_VXOR:      VXOR.DW R2,R0,R1
504
                OR A,0,V_RES
505
                VST 0+A,R2
506
 
507
                OR A,0,0
508
 VXOR_LOOP:     LD X,A+DATA_V
509
                LD Y,A+DATA_V2
510
                XOR X,X,Y
511
                LD Y,A+V_RES
512
                SUB 0,X,Y
513
                JNZ 0+ERR_VALU
514
                INC A,A
515
                SUB 0,A,k
516
                JNZ 0+VXOR_LOOP
517
 
518
TEST_VADD:      VADD.DW R2,R0,R1
519
                OR A,0,V_RES
520
                VST 0+A,R2
521
 
522
                ;32 bit
523
                OR A,0,0
524
 VADD_LOOP_DW:  LD X,A+DATA_V
525
                LD Y,A+DATA_V2
526
                ADD X,X,Y
527
                LD Y,A+V_RES
528
                SUB 0,X,Y
529
                JNZ 0+ERR_VALU
530
                INC A,A
531
                SUB 0,A,k
532
                JNZ 0+VADD_LOOP_DW
533
 
534
                ;64 bit
535
                VADD.QW R2,R0,R1
536
                OR A,0,V_RES
537
                VST 0+A,R2
538
 
539
                OR A,0,0
540
 VADD_LOOP_QW:  ST 0+AKKU,A         ;reset carry
541
                OR A,0,0
542
                ST 0+CARRY,A
543
                LD A,0+AKKU
544
 
545
                LD X,A+DATA_V
546
                LD Y,A+DATA_V2
547
                ADD X,X,Y
548
 
549
                JNC 0+VADD_QW_NC    ; save carry
550
                ST 0+AKKU,A
551
                OR A,0,1
552
                ST 0+CARRY,A
553
                LD A,0+AKKU
554
 
555
  VADD_QW_NC:   LD Y,A+V_RES
556
                SUB 0,X,Y
557
                JNZ 0+ERR_VALU
558
                INC A,A
559
 
560
                LD X,A+DATA_V
561
                LD Y,A+DATA_V2
562
                ADD X,X,Y
563
                LD Y,0+CARRY
564
                ADD X,X,Y
565
                LD Y,A+V_RES
566
 
567
                SUB 0,X,Y
568
                JNZ 0+ERR_VALU
569
                INC A,A
570
 
571
                SUB 0,A,k
572
                JNZ 0+VADD_LOOP_QW
573
 
574
                ;16bit
575
                VADD.W R2,R0,R1
576
                OR A,0,V_RES
577
                VST 0+A,R2
578
 
579
                OR A,0,0
580
  VADD_LOOP_W:  LD X,A+DATA_V           ;low word
581
                LD Y,A+DATA_V2
582
                ADD X,X,Y
583
                LD Y,A+V_RES
584
                AND X,X,$FFFF
585
                AND Y,Y,$FFFF
586
                SUB 0,X,Y
587
                JNZ 0+ERR_VALU
588
 
589
                LD X,A+DATA_V           ;high word
590
                LD Y,0+MASK_HW
591
                AND X,X,Y
592
 
593
                LD Y,A+DATA_V2
594
                ST 0+AKKU,A
595
                LD A,0+MASK_HW
596
 
597
                AND Y,Y,A
598
 
599
                LD A,0+AKKU
600
 
601
                ADD X,X,Y
602
                LD Y,A+V_RES
603
 
604
                ST 0+AKKU,A
605
                OR A,0,0
606
 
607
  VADD_LOOP_W2: LSR X,X
608
                LSR Y,Y
609
                INC A,A
610
                SUB 0,A,16
611
                JNZ 0+VADD_LOOP_W2
612
 
613
                LD A,0+AKKU
614
                AND X,X,$FFFF
615
                AND Y,Y,$FFFF
616
                SUB 0,X,Y
617
                JNZ 0+ERR_VALU
618
 
619
                INC A,A
620
                SUB 0,A,k
621
                JNZ 0+VADD_LOOP_W
622
 
623
                ;8 bit
624
                VADD.B R2,R0,R1
625
                OR A,0,V_RES
626
                VST 0+A,R2
627
 
628
                OR A,0,0
629
 VADD_LOOP_B:   OR X,A,0
630
                ST 0+AKKU,A
631
                LD A,X+DATA_V
632
                ST 0+A_REG,A
633
                LD A,X+DATA_V2
634
                ST 0+B_REG,A
635
                LD A,X+V_RES
636
                ST 0+RES_REG,A
637
                OR A,0,0
638
 
639
 VADD_LOOP_B2:  ST 0+I,A
640
 
641
                LD X,0+A_REG
642
                LD Y,0+B_REG
643
                LD A,0+RES_REG
644
 
645
                ADD X,X,Y
646
                AND X,X,$FF
647
                AND A,A,$FF
648
 
649
                SUB 0,X,A
650
                JNZ 0+ERR_VALU
651
 
652
                LD X,0+A_REG
653
                LD Y,0+B_REG
654
                LD A,0+RES_REG
655
 
656
                LSR X,X
657
                LSR Y,Y
658
                LSR A,A
659
 
660
                LSR X,X
661
                LSR Y,Y
662
                LSR A,A
663
 
664
                LSR X,X
665
                LSR Y,Y
666
                LSR A,A
667
 
668
                LSR X,X
669
                LSR Y,Y
670
                LSR A,A
671
 
672
                LSR X,X
673
                LSR Y,Y
674
                LSR A,A
675
 
676
                LSR X,X
677
                LSR Y,Y
678
                LSR A,A
679
 
680
                LSR X,X
681
                LSR Y,Y
682
                LSR A,A
683
 
684
                LSR X,X
685
                LSR Y,Y
686
                LSR A,A
687
 
688
                ST 0+RES_REG,A
689
                OR A,0,X
690
                ST 0+A_REG,A
691
                OR A,0,Y
692
                ST 0+B_REG,A
693
 
694
                LD A,0+I
695
                INC A,A
696
                SUB 0,A,4
697
                JNZ 0+VADD_LOOP_B2
698
 
699
                LD A,0+AKKU
700
                INC A,A
701
                SUB 0,A,k
702
                JNZ 0+VADD_LOOP_B
703
 
704
TEST_VSUB:      VSUB.DW R2,R0,R1
705
                OR A,0,V_RES
706
                VST 0+A,R2
707
 
708
                ;32 bit
709
                OR A,0,0
710
 VSUB_LOOP_DW:  LD X,A+DATA_V
711
                LD Y,A+DATA_V2
712
                SUB X,X,Y
713
                LD Y,A+V_RES
714
                SUB 0,X,Y
715
                JNZ 0+ERR_VALU
716
                INC A,A
717
                SUB 0,A,k
718
                JNZ 0+VSUB_LOOP_DW
719
 
720
                ;64 bit
721
                VSUB.QW R2,R0,R1
722
                OR A,0,V_RES
723
                VST 0+A,R2
724
 
725
                OR A,0,0
726
 VSUB_LOOP_QW:  ST 0+AKKU,A         ;reset carry
727
                OR A,0,0
728
                ST 0+CARRY,A
729
                LD A,0+AKKU
730
 
731
                LD X,A+DATA_V
732
                LD Y,A+DATA_V2
733
                SUB X,X,Y
734
 
735
                JNC 0+VSUB_QW_NC    ; save carry
736
                ST 0+AKKU,A
737
                OR A,0,1
738
                ST 0+CARRY,A
739
                LD A,0+AKKU
740
 
741
  VSUB_QW_NC:   LD Y,A+V_RES
742
                SUB 0,X,Y
743
                JNZ 0+ERR_VALU
744
                INC A,A
745
 
746
                LD X,A+DATA_V
747
                LD Y,A+DATA_V2
748
                SUB X,X,Y
749
                LD Y,0+CARRY
750
                SUB X,X,Y
751
                LD Y,A+V_RES
752
 
753
                SUB 0,X,Y
754
                JNZ 0+ERR_VALU
755
                INC A,A
756
 
757
                SUB 0,A,k
758
                JNZ 0+VSUB_LOOP_QW
759
 
760
                ;16bit
761
                VSUB.W R2,R0,R1
762
                OR A,0,V_RES
763
                VST 0+A,R2
764
 
765
                OR A,0,0
766
  VSUB_LOOP_W:  LD X,A+DATA_V           ;low word
767
                LD Y,A+DATA_V2
768
                SUB X,X,Y
769
                LD Y,A+V_RES
770
                AND X,X,$FFFF
771
                AND Y,Y,$FFFF
772
                SUB 0,X,Y
773
                JNZ 0+ERR_VALU
774
 
775
                LD X,A+DATA_V           ;high word
776
                LD Y,0+MASK_HW
777
                AND X,X,Y
778
 
779
                LD Y,A+DATA_V2
780
                ST 0+AKKU,A
781
                LD A,0+MASK_HW
782
 
783
                AND Y,Y,A
784
 
785
                LD A,0+AKKU
786
 
787
                SUB X,X,Y
788
                LD Y,A+V_RES
789
 
790
                ST 0+AKKU,A
791
                OR A,0,0
792
 
793
  VSUB_LOOP_W2: LSR X,X
794
                LSR Y,Y
795
                INC A,A
796
                SUB 0,A,16
797
                JNZ 0+VSUB_LOOP_W2
798
 
799
                LD A,0+AKKU
800
                AND X,X,$FFFF
801
                AND Y,Y,$FFFF
802
                SUB 0,X,Y
803
                JNZ 0+ERR_VALU
804
 
805
                INC A,A
806
                SUB 0,A,k
807
                JNZ 0+VSUB_LOOP_W
808
 
809
                ;8 bit
810
                VSUB.B R2,R0,R1
811
                OR A,0,V_RES
812
                VST 0+A,R2
813
 
814
                OR A,0,0
815
 VSUB_LOOP_B:   OR X,A,0
816
                ST 0+AKKU,A
817
                LD A,X+DATA_V
818
                ST 0+A_REG,A
819
                LD A,X+DATA_V2
820
                ST 0+B_REG,A
821
                LD A,X+V_RES
822
                ST 0+RES_REG,A
823
                OR A,0,0
824
 
825
 VSUB_LOOP_B2:  ST 0+I,A
826
 
827
                LD X,0+A_REG
828
                LD Y,0+B_REG
829
                LD A,0+RES_REG
830
 
831
                SUB X,X,Y
832
                AND X,X,$FF
833
                AND A,A,$FF
834
 
835
                SUB 0,X,A
836
                JNZ 0+ERR_VALU
837
 
838
                LD X,0+A_REG
839
                LD Y,0+B_REG
840
                LD A,0+RES_REG
841
 
842
                LSR X,X
843
                LSR Y,Y
844
                LSR A,A
845
 
846
                LSR X,X
847
                LSR Y,Y
848
                LSR A,A
849
 
850
                LSR X,X
851
                LSR Y,Y
852
                LSR A,A
853
 
854
                LSR X,X
855
                LSR Y,Y
856
                LSR A,A
857
 
858
                LSR X,X
859
                LSR Y,Y
860
                LSR A,A
861
 
862
                LSR X,X
863
                LSR Y,Y
864
                LSR A,A
865
 
866
                LSR X,X
867
                LSR Y,Y
868
                LSR A,A
869
 
870
                LSR X,X
871
                LSR Y,Y
872
                LSR A,A
873
 
874
                ST 0+RES_REG,A
875
                OR A,0,X
876
                ST 0+A_REG,A
877
                OR A,0,Y
878
                ST 0+B_REG,A
879
 
880
                LD A,0+I
881
                INC A,A
882
                SUB 0,A,4
883
                JNZ 0+VSUB_LOOP_B2
884
 
885
                LD A,0+AKKU
886
                INC A,A
887
                SUB 0,A,k
888
                JNZ 0+VSUB_LOOP_B
889
 
890
TEST_VLSL:      VLSL.DW R2,R0
891
                OR A,0,V_RES
892
                VST 0+A,R2
893
 
894
                ;32 bit
895
                OR A,0,0
896
 VLSL_LOOP_DW:  LD X,A+DATA_V
897
                LSL X,X
898
                LD Y,A+V_RES
899
                SUB 0,X,Y
900
                JNZ 0+ERR_VALU
901
                INC A,A
902
                SUB 0,A,k
903
                JNZ 0+VLSL_LOOP_DW
904
 
905
                ;64 bit
906
                VLSL.QW R2,R0
907
                OR A,0,V_RES
908
                VST 0+A,R2
909
 
910
                OR A,0,0
911
 VLSL_LOOP_QW:  ST 0+AKKU,A         ;reset carry
912
                OR A,0,0
913
                ST 0+CARRY,A
914
                LD A,0+AKKU
915
 
916
                LD X,A+DATA_V
917
                LSL X,X
918
 
919
                JNC 0+VLSL_QW_NC    ; save carry
920
                ST 0+AKKU,A
921
                OR A,0,1
922
                ST 0+CARRY,A
923
                LD A,0+AKKU
924
 
925
  VLSL_QW_NC:   LD Y,A+V_RES
926
                SUB 0,X,Y
927
                JNZ 0+ERR_VALU
928
                INC A,A
929
 
930
                LD X,A+DATA_V
931
                LD Y,0+CARRY
932
                LSR Y,Y
933
                ROL X,X
934
 
935
                LD Y,A+V_RES
936
 
937
                SUB 0,X,Y
938
                JNZ 0+ERR_VALU
939
                INC A,A
940
 
941
                SUB 0,A,k
942
                JNZ 0+VLSL_LOOP_QW
943
 
944
                ;16bit
945
                VLSL.W R2,R0
946
                OR A,0,V_RES
947
                VST 0+A,R2
948
 
949
                OR A,0,0
950
  VLSL_LOOP_W:  LD X,A+DATA_V           ;low word
951
                LSL X,X
952
                LD Y,A+V_RES
953
                AND X,X,$FFFF
954
                AND Y,Y,$FFFF
955
                SUB 0,X,Y
956
                JNZ 0+ERR_VALU
957
 
958
                LD X,A+DATA_V           ;high word
959
                LD Y,0+MASK_HW
960
                AND X,X,Y
961
 
962
                LSL X,X
963
                LD Y,A+V_RES
964
 
965
                ST 0+AKKU,A
966
                OR A,0,0
967
 
968
  VLSL_LOOP_W2: LSR X,X
969
                LSR Y,Y
970
                INC A,A
971
                SUB 0,A,16
972
                JNZ 0+VLSL_LOOP_W2
973
 
974
                LD A,0+AKKU
975
                AND X,X,$FFFF
976
                AND Y,Y,$FFFF
977
                SUB 0,X,Y
978
                JNZ 0+ERR_VALU
979
 
980
                INC A,A
981
                SUB 0,A,k
982
                JNZ 0+VLSL_LOOP_W
983
 
984
                ;8 bit
985
                VLSL.B R2,R0
986
                OR A,0,V_RES
987
                VST 0+A,R2
988
 
989
                OR A,0,0
990
 VLSL_LOOP_B:   OR X,A,0
991
                ST 0+AKKU,A
992
                LD A,X+DATA_V
993
                ST 0+A_REG,A
994
                LD A,X+V_RES
995
                ST 0+RES_REG,A
996
                OR A,0,0
997
 
998
 VLSL_LOOP_B2:  ST 0+I,A
999
 
1000
                LD X,0+A_REG
1001
 
1002
                LD A,0+RES_REG
1003
 
1004
                LSL X,X
1005
                AND X,X,$FF
1006
                AND A,A,$FF
1007
 
1008
                SUB 0,X,A
1009
                JNZ 0+ERR_VALU
1010
 
1011
                LD X,0+A_REG
1012
                LD A,0+RES_REG
1013
 
1014
                LSR X,X
1015
                LSR A,A
1016
 
1017
                LSR X,X
1018
                LSR A,A
1019
 
1020
                LSR X,X
1021
                LSR A,A
1022
 
1023
                LSR X,X
1024
                LSR A,A
1025
 
1026
                LSR X,X
1027
                LSR A,A
1028
 
1029
                LSR X,X
1030
                LSR A,A
1031
 
1032
                LSR X,X
1033
                LSR A,A
1034
 
1035
                LSR X,X
1036
                LSR A,A
1037
 
1038
                ST 0+RES_REG,A
1039
                OR A,0,X
1040
                ST 0+A_REG,A
1041
 
1042
                LD A,0+I
1043
                INC A,A
1044
                SUB 0,A,4
1045
                JNZ 0+VLSL_LOOP_B2
1046
 
1047
                LD A,0+AKKU
1048
                INC A,A
1049
                SUB 0,A,k
1050
                JNZ 0+VLSL_LOOP_B
1051
 
1052
TEST_VLSR:      VLSR.DW R2,R0
1053
                OR A,0,V_RES
1054
                VST 0+A,R2
1055
 
1056
                ;32 bit
1057
                OR A,0,0
1058
 VLSR_LOOP_DW:  LD X,A+DATA_V
1059
                LSR X,X
1060
                LD Y,A+V_RES
1061
                SUB 0,X,Y
1062
                JNZ 0+ERR_VALU
1063
                INC A,A
1064
                SUB 0,A,k
1065
                JNZ 0+VLSR_LOOP_DW
1066
 
1067
                ;64 bit
1068
                VLSR.QW R2,R0
1069
                OR A,0,V_RES
1070
                VST 0+A,R2
1071
 
1072
                OR A,0,0
1073
 VLSR_LOOP_QW:  ST 0+AKKU,A         ;reset carry
1074
                OR A,0,0
1075
                ST 0+CARRY,A
1076
                LD A,0+AKKU
1077
 
1078
                INC A,A
1079
                LD X,A+DATA_V
1080
                LSR X,X
1081
 
1082
                JNC 0+VLSR_QW_NC    ; save carry
1083
                ST 0+AKKU,A
1084
                OR A,0,1
1085
                ST 0+CARRY,A
1086
                LD A,0+AKKU
1087
 
1088
  VLSR_QW_NC:   LD Y,A+V_RES
1089
                SUB 0,X,Y
1090
                JNZ 0+ERR_VALU
1091
                DEC A,A
1092
 
1093
                LD X,A+DATA_V
1094
                LD Y,0+CARRY
1095
                LSR Y,Y
1096
                ROR X,X
1097
 
1098
                LD Y,A+V_RES
1099
 
1100
                SUB 0,X,Y
1101
                JNZ 0+ERR_VALU
1102
                INC A,A
1103
                INC A,A
1104
 
1105
                SUB 0,A,k
1106
                JNZ 0+VLSR_LOOP_QW
1107
 
1108
                ;16bit
1109
                VLSR.W R2,R0
1110
                OR A,0,V_RES
1111
                VST 0+A,R2
1112
 
1113
                OR A,0,0
1114
  VLSR_LOOP_W:  LD X,A+DATA_V           ;low word
1115
                AND X,X,$FFFF
1116
                LSR X,X
1117
                LD Y,A+V_RES
1118
                AND Y,Y,$FFFF
1119
 
1120
                SUB 0,X,Y
1121
                JNZ 0+ERR_VALU
1122
 
1123
                LD X,A+DATA_V           ;high word
1124
                LD Y,0+MASK_HW
1125
                AND X,X,Y
1126
 
1127
                LSR X,X
1128
                LD Y,A+V_RES
1129
 
1130
                ST 0+AKKU,A
1131
                OR A,0,0
1132
 
1133
  VLSR_LOOP_W2: LSR X,X
1134
                LSR Y,Y
1135
                INC A,A
1136
                SUB 0,A,16
1137
                JNZ 0+VLSR_LOOP_W2
1138
 
1139
                LD A,0+AKKU
1140
                AND X,X,$FFFF
1141
                AND Y,Y,$FFFF
1142
                SUB 0,X,Y
1143
                JNZ 0+ERR_VALU
1144
 
1145
                INC A,A
1146
                SUB 0,A,k
1147
                JNZ 0+VLSR_LOOP_W
1148
 
1149
                ;8 bit
1150
                VLSR.B R2,R0
1151
                OR A,0,V_RES
1152
                VST 0+A,R2
1153
 
1154
                OR A,0,0
1155
 VLSR_LOOP_B:   OR X,A,0
1156
                ST 0+AKKU,A
1157
                LD A,X+DATA_V
1158
                ST 0+A_REG,A
1159
                LD A,X+V_RES
1160
                ST 0+RES_REG,A
1161
                OR A,0,0
1162
 
1163
 VLSR_LOOP_B2:  ST 0+I,A
1164
 
1165
                LD X,0+A_REG
1166
                LD A,0+RES_REG
1167
 
1168
                AND X,X,$FF
1169
                LSR X,X
1170
                AND A,A,$FF
1171
 
1172
                SUB 0,X,A
1173
                JNZ 0+ERR_VALU
1174
 
1175
                LD X,0+A_REG
1176
                LD A,0+RES_REG
1177
 
1178
                LSR X,X
1179
                LSR A,A
1180
 
1181
                LSR X,X
1182
                LSR A,A
1183
 
1184
                LSR X,X
1185
                LSR A,A
1186
 
1187
                LSR X,X
1188
                LSR A,A
1189
 
1190
                LSR X,X
1191
                LSR A,A
1192
 
1193
                LSR X,X
1194
                LSR A,A
1195
 
1196
                LSR X,X
1197
                LSR A,A
1198
 
1199
                LSR X,X
1200
                LSR A,A
1201
 
1202
                ST 0+RES_REG,A
1203
                OR A,0,X
1204
                ST 0+A_REG,A
1205
 
1206
                LD A,0+I
1207
                INC A,A
1208
                SUB 0,A,4
1209
                JNZ 0+VLSR_LOOP_B2
1210
 
1211
                LD A,0+AKKU
1212
                INC A,A
1213
                SUB 0,A,k
1214
                JNZ 0+VLSR_LOOP_B
1215
 
1216
 
1217
                ;vector and scalar commands simultaneous
1218
TEST_SIMUL:     OR A,0,DATA_V
1219
                VLD R0,0+A
1220
                OR A,0,DATA_V2
1221
                VLD R1,0+A
1222
 
1223
                LD X,0+DATA_A
1224
                LD Y,0+DATA_B
1225
                OR A,0,0
1226
                VADD.DW R2,R0,R1
1227
                VADD.DW R3,R0,R1 | ADD A,X,Y
1228
                VSUB.DW R3,R3,R2
1229
 
1230
                OR Y,0,V_RES
1231
                VST 0+Y,R3
1232
 
1233
                LD X,0+ADD_RES
1234
                SUB 0,X,A
1235
                JNZ 0+ERR_SIMUL
1236
 
1237
                OR A,0,0
1238
 SIMUL_LOOP1:   LD X,A+V_RES
1239
                JNZ 0+ERR_SIMUL
1240
                INC A,A
1241
                SUB 0,A,k
1242
                JNZ 0+SIMUL_LOOP1
1243
 
1244
                LD X,0+DATA_A
1245
                LD Y,0+DATA_B
1246
                OR A,0,0
1247
                VXOR.DW R2,R0,R1
1248
                VXOR.DW R3,R0,R1 | SUB A,X,Y
1249
                VSUB.DW R3,R3,R2
1250
 
1251
                OR Y,0,V_RES
1252
                VST 0+Y,R3
1253
 
1254
                LD X,0+SUB_RES
1255
                SUB 0,X,A
1256
                JNZ 0+ERR_SIMUL
1257
 
1258
                OR A,0,0
1259
 SIMUL_LOOP2:   LD X,A+V_RES
1260
                JNZ 0+ERR_SIMUL
1261
                INC A,A
1262
                SUB 0,A,k
1263
                JNZ 0+SIMUL_LOOP2
1264
 
1265
 
1266
                LD X,0+DATA_A
1267
                LD Y,0+DATA_B
1268
                OR A,0,0
1269
                VMOV R2,R0
1270
                VMOV R3,R0 | AND A,X,Y
1271
                VSUB.DW R3,R3,R2
1272
 
1273
                OR Y,0,V_RES
1274
                VST 0+Y,R3
1275
 
1276
                LD X,0+AND_RES
1277
                SUB 0,X,A
1278
                JNZ 0+ERR_SIMUL
1279
 
1280
                OR A,0,0
1281
 SIMUL_LOOP3:   LD X,A+V_RES
1282
                JNZ 0+ERR_SIMUL
1283
                INC A,A
1284
                SUB 0,A,k
1285
                JNZ 0+SIMUL_LOOP3
1286
 
1287
TEST_VSHUF:     OR A,0,use_shuffle
1288
                JZ 0+NO_SHUFFLE
1289
                OR A,0,DATA_V
1290
                VLD R0,0+A
1291
                OR A,0,DATA_V2
1292
                VLD R1,0+A
1293
 
1294
 TEST_VSHUF1:   VSHUF R2,R0,R1,00101000011011 ;vwidth + ssss + vn
1295
                OR A,0,V_RES
1296
                VST 0+A,R2
1297
 
1298
                LD X,A+0
1299
                LD A,0+RES_VSHUF_8L
1300
                SUB 0,A,X
1301
                JNZ 0+ERR_VSHUF
1302
 
1303
                OR A,0,V_RES
1304
                LD Y,A+1
1305
                LD A,0+RES_VSHUF_8H
1306
                SUB 0,A,Y
1307
                JNZ 0+ERR_VSHUF
1308
 
1309
 TEST_VSHUF2:   VSHUF R2,R0,R1,01110010110001 ;vwidth + ssss + vn
1310
                OR A,0,V_RES
1311
                VST 0+A,R2
1312
 
1313
                OR A,0,0
1314
                LD X,A+V_RES
1315
                OR A,0,1
1316
                LD Y,A+DATA_V
1317
                SUB 0,X,Y
1318
                JNZ 0+ERR_VSHUF
1319
 
1320
                OR A,0,1
1321
                LD X,A+V_RES
1322
                OR A,0,0
1323
                LD Y,A+DATA_V
1324
                SUB 0,X,Y
1325
                JNZ 0+ERR_VSHUF
1326
 
1327
                OR A,0,2
1328
                LD X,A+V_RES
1329
                OR A,0,4
1330
                LD Y,A+DATA_V
1331
                SUB 0,X,Y
1332
                JNZ 0+ERR_VSHUF
1333
 
1334
                OR A,0,3
1335
                LD X,A+V_RES
1336
                OR A,0,3
1337
                LD Y,A+DATA_V
1338
                SUB 0,X,Y
1339
                JNZ 0+ERR_VSHUF
1340
 
1341
 TEST_VSHUF3:   VSHUF R2,R0,R1,10001101110010 ;vwidth + ssss + vn
1342
                OR A,0,V_RES
1343
                VST 0+A,R2
1344
 
1345
                OR A,0,0
1346
                LD X,A+V_RES
1347
                OR A,0,5
1348
                LD Y,A+DATA_V
1349
                SUB 0,X,Y
1350
                JNZ 0+ERR_VSHUF
1351
 
1352
                OR A,0,1
1353
                LD X,A+V_RES
1354
                OR A,0,6
1355
                LD Y,A+DATA_V
1356
                SUB 0,X,Y
1357
                JNZ 0+ERR_VSHUF
1358
 
1359
                OR A,0,2
1360
                LD X,A+V_RES
1361
                OR A,0,1
1362
                LD Y,A+DATA_V
1363
                SUB 0,X,Y
1364
                JNZ 0+ERR_VSHUF
1365
 
1366
                OR A,0,3
1367
                LD X,A+V_RES
1368
                OR A,0,2
1369
                LD Y,A+DATA_V
1370
                SUB 0,X,Y
1371
                JNZ 0+ERR_VSHUF
1372
 
1373
                OR A,0,4
1374
                LD X,A+V_RES
1375
                OR A,0,6
1376
                LD Y,A+DATA_V
1377
                SUB 0,X,Y
1378
                JNZ 0+ERR_VSHUF
1379
 
1380
                OR A,0,5
1381
                LD X,A+V_RES
1382
                OR A,0,7
1383
                LD Y,A+DATA_V
1384
                SUB 0,X,Y
1385
                JNZ 0+ERR_VSHUF
1386
 
1387
                OR A,0,6
1388
                LD X,A+V_RES
1389
                OR A,0,2
1390
                LD Y,A+DATA_V
1391
                SUB 0,X,Y
1392
                JNZ 0+ERR_VSHUF
1393
 
1394
                OR A,0,7
1395
                LD X,A+V_RES
1396
                OR A,0,3
1397
                LD Y,A+DATA_V
1398
                SUB 0,X,Y
1399
                JNZ 0+ERR_VSHUF
1400
 
1401
 TEST_VSHUF4:   VSHUF R2,R0,R1,11010100100111 ;vwidth + ssss + vn
1402
                OR A,0,V_RES
1403
                VST 0+A,R2
1404
 
1405
                OR A,0,0
1406
                LD X,A+V_RES
1407
                OR A,0,13
1408
                LD Y,A+DATA_V
1409
                SUB 0,X,Y
1410
                JNZ 0+ERR_VSHUF
1411
 
1412
                OR A,0,1
1413
                LD X,A+V_RES
1414
                OR A,0,14
1415
                LD Y,A+DATA_V
1416
                SUB 0,X,Y
1417
                JNZ 0+ERR_VSHUF
1418
 
1419
                OR A,0,2
1420
                LD X,A+V_RES
1421
                OR A,0,15
1422
                LD Y,A+DATA_V
1423
                SUB 0,X,Y
1424
                JNZ 0+ERR_VSHUF
1425
 
1426
                OR A,0,3
1427
                LD X,A+V_RES
1428
                OR A,0,16
1429
                LD Y,A+DATA_V
1430
                SUB 0,X,Y
1431
                JNZ 0+ERR_VSHUF
1432
 
1433
                ;--------------
1434
 
1435
                OR A,0,4
1436
                LD X,A+V_RES
1437
                OR A,0,4
1438
                LD Y,A+DATA_V
1439
                SUB 0,X,Y
1440
                JNZ 0+ERR_VSHUF
1441
 
1442
                OR A,0,5
1443
                LD X,A+V_RES
1444
                OR A,0,5
1445
                LD Y,A+DATA_V
1446
                SUB 0,X,Y
1447
                JNZ 0+ERR_VSHUF
1448
 
1449
                OR A,0,6
1450
                LD X,A+V_RES
1451
                OR A,0,6
1452
                LD Y,A+DATA_V
1453
                SUB 0,X,Y
1454
                JNZ 0+ERR_VSHUF
1455
 
1456
                OR A,0,7
1457
                LD X,A+V_RES
1458
                OR A,0,7
1459
                LD Y,A+DATA_V
1460
                SUB 0,X,Y
1461
                JNZ 0+ERR_VSHUF
1462
 
1463
                ;--------------
1464
 
1465
                OR A,0,8
1466
                LD X,A+V_RES
1467
                OR A,0,9
1468
                LD Y,A+DATA_V
1469
                SUB 0,X,Y
1470
                JNZ 0+ERR_VSHUF
1471
 
1472
                OR A,0,9
1473
                LD X,A+V_RES
1474
                OR A,0,10
1475
                LD Y,A+DATA_V
1476
                SUB 0,X,Y
1477
                JNZ 0+ERR_VSHUF
1478
 
1479
                OR A,0,10
1480
                LD X,A+V_RES
1481
                OR A,0,11
1482
                LD Y,A+DATA_V
1483
                SUB 0,X,Y
1484
                JNZ 0+ERR_VSHUF
1485
 
1486
                OR A,0,11
1487
                LD X,A+V_RES
1488
                OR A,0,12
1489
                LD Y,A+DATA_V
1490
                SUB 0,X,Y
1491
                JNZ 0+ERR_VSHUF
1492
 
1493
                ;--------------
1494
 
1495
                OR A,0,12
1496
                LD X,A+V_RES
1497
                OR A,0,0
1498
                LD Y,A+DATA_V
1499
                SUB 0,X,Y
1500
                JNZ 0+ERR_VSHUF
1501
 
1502
                OR A,0,13
1503
                LD X,A+V_RES
1504
                OR A,0,1
1505
                LD Y,A+DATA_V
1506
                SUB 0,X,Y
1507
                JNZ 0+ERR_VSHUF
1508
 
1509
                OR A,0,14
1510
                LD X,A+V_RES
1511
                OR A,0,2
1512
                LD Y,A+DATA_V
1513
                SUB 0,X,Y
1514
                JNZ 0+ERR_VSHUF
1515
 
1516
                OR A,0,15
1517
                LD X,A+V_RES
1518
                OR A,0,3
1519
                LD Y,A+DATA_V
1520
                SUB 0,X,Y
1521
                JNZ 0+ERR_VSHUF
1522
 
1523
 NO_SHUFFLE:     NOP
1524
 
1525
TEST_VMUL:      OR A,0,use_mul
1526
                JZ 0+NO_VMUL
1527
 
1528
                ;16 bit
1529
                VMUL.W R2,R0,R1
1530
                OR A,0,V_RES
1531
                VST 0+A,R2
1532
 
1533
                OR A,0,0
1534
 VMUL_LOOP_W:   LD X,A+DATA_V
1535
                LD Y,A+DATA_V2
1536
                MUL X,X,Y
1537
                LD Y,A+V_RES
1538
                SUB 0,X,Y
1539
                JNZ 0+ERR_VALU
1540
                INC A,A
1541
                SUB 0,A,k
1542
                JNZ 0+VMUL_LOOP_W
1543
 
1544
                ;8 bit
1545
                VMUL.B R2,R0,R1
1546
                OR A,0,V_RES
1547
                VST 0+A,R2
1548
 
1549
                OR A,0,0
1550
 VMUL_LOOP_B:   LD X,A+DATA_V   ;high word
1551
                LD Y,A+DATA_V2
1552
                AND X,X,$FF
1553
                AND Y,Y,$FF
1554
                MUL X,X,Y
1555
                LD Y,A+V_RES
1556
                AND Y,Y,$FFFF
1557
                SUB 0,X,Y
1558
                JNZ 0+ERR_VALU
1559
 
1560
                LD X,A+DATA_V               ;low word
1561
                LD Y,A+DATA_V2
1562
 
1563
                ST 0+AKKU,A
1564
                OR A,0,0
1565
 VMUL_LOOP_B2:  LSR X,X
1566
                LSR Y,Y
1567
                INC A,A
1568
                SUB 0,A,16
1569
                JNZ 0+VMUL_LOOP_B2
1570
                AND X,X,$FF
1571
                AND Y,Y,$FF
1572
                MUL X,X,Y
1573
 
1574
                LD A,0+AKKU
1575
                LD Y,A+V_RES
1576
 
1577
                OR A,0,0
1578
 VMUL_LOOP_B3:  LSR Y,Y
1579
                INC A,A
1580
                SUB 0,A,16
1581
                JNZ 0+VMUL_LOOP_B3
1582
 
1583
                SUB 0,X,Y
1584
                JNZ 0+ERR_VALU
1585
 
1586
                LD A,0+AKKU
1587
                INC A,A
1588
                SUB 0,A,k
1589
                JNZ 0+VMUL_LOOP_B
1590
 NO_VMUL:       NOP
1591
 
1592
TEST_VMOLR:     OR A,0,use_mul
1593
                JZ 0+NO_VMOLR
1594
 
1595
                OR A,0,DATA_V
1596
                VLD R0,0+A
1597
                OR A,0,DATA_V2
1598
                VLD R1,0+A
1599
 
1600
                VMOL R2,R0
1601
                OR A,0,V_RES
1602
                VST 0+A,R2
1603
 
1604
                OR A,0,0
1605
 VMOL_LOOP:     LD X,A+V_RES
1606
                LD Y,A+DATA_V2
1607
                SUB 0,X,Y
1608
                JNZ 0+ERR_VMOLR
1609
                SUB 0,A,k
1610
                JNZ 0+VMOL_LOOP
1611
 
1612
                VMOR R2,R1
1613
                OR A,0,V_RES
1614
                VST 0+A,R2
1615
 
1616
                OR A,0,0
1617
 VMOR_LOOP:     LD X,A+V_RES
1618
                LD Y,A+DATA_V
1619
                SUB 0,X,Y
1620
                JNZ 0+ERR_VMOLR
1621
                SUB 0,A,k
1622
                JNZ 0+VMOR_LOOP
1623
 
1624
 NO_VMOLR:      NOP
1625
 
1626
FINISH:         JMP 0+FINISH
1627
 
1628
ERR_SCALAR:     HALT
1629
ERR_COOP:       HALT
1630
ERR_VECTOR:     HALT
1631
ERR_VALU:       HALT
1632
ERR_SIMUL:      HALT
1633
ERR_VSHUF:      HALT
1634
ERR_VMOLR:      HALT
1635
 
1636
AKKU:           DC 0
1637
CARRY:          DC 0
1638
X_REG:          DC 0
1639
Y_REG:          DC 0
1640
I:              DC 0
1641
 
1642
A_REG:          DC 0
1643
B_REG:          DC 0
1644
RES_REG:        DC 0
1645
 

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.