OpenCores
URL https://opencores.org/ocsvn/sudoku/sudoku/trunk

Subversion Repositories sudoku

[/] [sudoku/] [branches/] [zynq/] [sw/] [main.cc] - Blame information for rev 7

Go to most recent revision | Details | Compare with Previous | View Log

Line No. Rev Author Line
1 6 dsheffie
#include <cstdio>
2
#include <cstdlib>
3
#include <cassert>
4
#include <cstring>
5
#include <time.h>
6
#include <limits.h>
7
#include <stdint.h>
8
#include <sys/time.h>
9
#include <unistd.h>
10
 
11
#include "driver.h"
12
#include "counters.h"
13
 
14
#define USE_DIV_TABLE 1
15
const int divTable[9] = {0,0,0,3,3,3,6,6,6};
16
 
17
double timestamp()
18
{
19
  struct timeval tv;
20
  gettimeofday (&tv, 0);
21
  return tv.tv_sec + 1e-6*tv.tv_usec;
22
}
23
 
24
int32_t sudoku(uint32_t *board, uint32_t *os);
25
 
26
int u32_cmp(const void* a, const void *b)
27
{
28
  uint32_t x = *((uint32_t*)a);
29
  uint32_t y = *((uint32_t*)b);
30
  return (x<y);
31
}
32
 
33
inline uint32_t ln2(uint32_t x)
34
{
35
  uint32_t y = 1;
36
  while(x > 1)
37
    {
38
      y++;
39
      x = x>>1;
40
    }
41
  return y;
42
}
43
 
44
inline uint32_t one_set(uint32_t x)
45
{
46
  /* all ones if pow2, otherwise 0 */
47
  uint32_t pow2 = (x & (x-1));
48
  uint32_t m = (pow2 == 0);
49
  return ((~m) + 1) & x;
50
}
51
 
52
inline uint32_t find_first_set(uint32_t x)
53
{
54
#ifdef __GNUC__
55
  return __builtin_ctz(x);
56
#else
57
  /* copied from the sparc v9 reference manual */
58
  return count_ones(x ^ (~(-x))) - 1;
59
#endif
60
}
61
 
62
inline uint32_t count_ones(uint32_t x)
63
{
64
#ifdef __GNUC__
65
  return __builtin_popcount(x);
66
#else
67
  uint32_t y = 0;
68
  for(y=0;x!=0;y++)
69
    {
70
      x &= (x-1);
71
    }
72
  return y;
73
#endif
74
}
75
 
76
inline uint32_t isPow2(uint32_t x)
77
{
78
  uint32_t pow2 = (x & (x-1));
79
  return (pow2 == 0);
80
}
81
 
82
int32_t check_correct(uint32_t *board, uint32_t *unsolved_pieces)
83
{
84
  int32_t i,j;
85
  int32_t ii,jj;
86
  int32_t si,sj;
87
  int32_t tmp;
88
 
89
  *unsolved_pieces = 0;
90
  int32_t violated = 0;
91
 
92
  uint32_t counts[81];
93
  for(i=0;i < 81; i++)
94
    {
95
      counts[i] = count_ones(board[i]);
96
      if(counts[i]!=1)
97
        {
98
          *unsolved_pieces = 1;
99
          return 0;
100
        }
101
    }
102
 
103
 
104
  /* for each row */
105
  for(i=0;i<9;i++)
106
    {
107
      uint32_t sums[9] = {0};
108
      for(j=0;j<9;j++)
109
        {
110
          if(counts[i*9 +j] == 1)
111
            {
112
              tmp =ln2(board[i*9+j])-1;
113
              sums[tmp]++;
114
              if(sums[tmp] > 1)
115
                {
116
                  //char buf[80];
117
                  //sprintf_binary(board[i*9+j],buf,80);
118
                  //printf("violated row %d, sums[%d]=%d, board = %s\n", i, tmp, sums[tmp], buf);
119
                  //print_board(board);
120
                  violated = 1;
121
                  goto done;
122
                }
123
            }
124
        }
125
    }
126
  /* for each column */
127
 
128
   for(j=0;j<9;j++)
129
   {
130
     uint32_t sums[9] = {0};
131
     for(i=0;i<9;i++)
132
     {
133
       if(counts[i*9 +j] == 1)
134
         {
135
           tmp =ln2(board[i*9+j])-1;
136
           sums[tmp]++;
137
           if(sums[tmp] > 1)
138
             {
139
               violated = 1;
140
               goto done;
141
               //printf("violated column %d, sums[%d]=%d\n", i, tmp, sums[tmp]);
142
               //return 0;
143
             }
144
         }
145
     }
146
   }
147
 
148
   for(i=0;i<9;i++)
149
     {
150
#ifdef USE_DIV_TABLE
151
       si = divTable[i];
152
#else
153
       si = 3*(i/3);
154
#endif
155
       for(j=0;j<9;j++)
156
         {
157
#ifdef USE_DIV_TABLE
158
           sj = 3*(j/3);
159
#else
160
           sj = divTable[j];
161
#endif
162
           uint32_t sums[9] = {0};
163
           for(ii=si;ii<(si+3);ii++)
164
             {
165
               for(jj=sj;jj<(sj+3);jj++)
166
                 {
167
                   if(counts[ii*9 +jj] == 1)
168
                     {
169
                       tmp =ln2(board[ii*9+jj])-1;
170
                       sums[tmp]++;
171
                       if(sums[tmp] > 1)
172
                         {
173
                           violated = 1;
174
                           goto done;
175
                         }
176
                     }
177
                 }
178
             }
179
         }
180
     }
181
 
182
done:
183
   return (violated == 0);
184
}
185
 
186
 
187
 
188
void print_board(uint32_t *board)
189
{
190
  int32_t i,j;
191
  char buf[80] = {0};
192
  for(i=0;i<9;i++)
193
    {
194
      for(j=0;j<9;j++)
195
        {
196
          /* sprintf_binary(board[i*9+j], buf, 80);
197
           * printf("%s ", buf); */
198
          printf("%d ", ln2(board[i*9+j]));
199
        }
200
      printf("\n");
201
    }
202
  printf("\n");
203
}
204
 
205
 
206
int32_t hw_solve(Driver *d, uint32_t *board)
207
{
208
  uint32_t rc=0;
209
  d->reset();
210
  const uint32_t w_bit = 1 << 31;
211
 
212
  /*
213
  if(init == 0)
214
    {
215
      memset(&cCnt,0,sizeof(cCnt));
216
      initTicks(cCnt);
217
      init = 1;
218
    }
219
  */
220
  //uint64_t c0 = getTicks(cCnt);
221
  for(int i = 0; i < 81; i++)
222
    {
223
      uint32_t addr = i;
224
      uint32_t data = board[i];
225
      uint32_t v = w_bit | (data << 7) | addr;
226
      d->write(1, v);
227
    }
228
  //c0 = getTicks(cCnt) - c0;
229
  //printf("%llu cycles to write data to FPGA\n", c0);
230
 
231
  /* only pulsed for one cycle */
232
  d->write(7,1);
233
 
234
  do
235
    {
236
      rc = d->read(0) & 0x7;
237
    }
238
  while(rc ==0);
239
 
240
  //c0 = getTicks(cCnt);
241
  for(int i = 0; i < 81; i++)
242
    {
243
      uint32_t addr = i;
244
      d->write(1, addr);
245
      uint32_t v = d->read(2);
246
      board[i] = v;
247
    }
248
  //c0 = getTicks(cCnt) - c0;
249
  //printf("%llu cycles to read data from FPGA\n", c0);
250
 
251
  return (rc == 0x1);
252
}
253
 
254
int main(int argc, char **argv)
255
{
256
  uint32_t *cpuBoard, *hwBoard;
257
  uint32_t *os;
258
  uint64_t c0,c1,i0;
259
  int i,j,rc;
260
  uint32_t **puzzles;
261
  uint32_t **cpyPuzzles;
262
  uint32_t nPuzzles = 0;
263
  uint32_t *speedUps = 0;
264
  int c;
265
 
266
  hwCounter_t cCnt,iCnt;
267
  memset(&cCnt,0,sizeof(cCnt));
268
  memset(&iCnt,0,sizeof(iCnt));
269
  initTicks(cCnt);
270
  initInsns(iCnt);
271
 
272
  Driver *d = new Driver(0x79100000);
273
 
274
  FILE *fp = 0;
275
 
276
  if(argc < 2)
277
    return -1;
278
 
279
 
280
  fp = fopen(argv[1], "r");
281
  assert(fp);
282
 
283
 
284
 
285
  while((c = fgetc(fp)) != EOF)
286
    {
287
      if(c == '\n')
288
        {
289
          nPuzzles++;
290
        }
291
    }
292
  rewind(fp);
293
  puzzles = (uint32_t**)malloc(sizeof(uint32_t*)*nPuzzles);
294
  cpyPuzzles = (uint32_t**)malloc(sizeof(uint32_t*)*nPuzzles);
295
  speedUps = (uint32_t*)malloc(sizeof(uint32_t)*nPuzzles);
296
  memset(speedUps,0,sizeof(uint32_t)*nPuzzles);
297
 
298
  for(i=0;i<nPuzzles;i++)
299
    {
300
      puzzles[i] = (uint32_t*)malloc(sizeof(uint32_t)*81);
301
      cpyPuzzles[i] = (uint32_t*)malloc(sizeof(uint32_t)*81);
302
    }
303
 
304
  j = i = 0;
305
  while((c  = fgetc(fp)) != EOF)
306
    {
307
      if(c == '\n')
308
        {
309
          j = 0;
310
          i++;
311
        }
312
      else if(c == ' ')
313
        {
314
          continue;
315
        }
316
      else if(c == '.')
317
        {
318
          puzzles[i][j++] = 0x1ff;
319
        }
320
      else
321
        {
322
          if(c >= 65 && c <= 70)
323
            {
324
              c -= 55;
325
            }
326
          else
327
            {
328
              c -= 48;
329
            }
330
          puzzles[i][j++] = 1 << (c-1);
331
        }
332
    }
333
  fclose(fp);
334
  for(i=0;i<nPuzzles;i++)
335
    {
336
      memcpy(cpyPuzzles[i], puzzles[i], sizeof(uint32_t)*81);
337
    }
338
 
339
  printf("nPuzzles=%d\n", nPuzzles);
340
  cpuBoard = (uint32_t*)malloc(sizeof(uint32_t)*81);
341
  hwBoard = (uint32_t*)malloc(sizeof(uint32_t)*81);
342
  os = (uint32_t*)malloc(sizeof(uint32_t)*81);
343
  uint32_t mismatches = 0;
344
 
345
  double t0,t1;
346
  for(i=0;i<nPuzzles;i++)
347
    {
348
      c0 = c1 = 1;
349
      t0 = t1 = 0.0;
350
 
351
      memcpy(cpuBoard,puzzles[i],sizeof(uint32_t)*81);
352
      memcpy(hwBoard,puzzles[i],sizeof(uint32_t)*81);
353
 
354
      c0 = getTicks(cCnt);
355
      i0 = getInsns(iCnt);
356
      c = sudoku(cpuBoard, os);
357
      i0 = getInsns(iCnt) - i0;
358
      c0 = getTicks(cCnt) - c0;
359
 
360
      double ipc = ((double)i0) / ((double)c0);
361
 
362
      c1 = getTicks(cCnt);
363
      rc = hw_solve(d,hwBoard);
364
      c1 = getTicks(cCnt) - c1;
365
 
366
      speedUps[i] = c0/c1;
367
      printf("SpeedUp = %llu (%d) : SW ticks = %llu (IPC = %g), HW ticks = %llu\n",
368
             c0/c1, c, c0, ipc, c1);
369
 
370
      if(rc != 1)
371
        {
372
          printf("error with puzzle %d\n", i);
373
          //print_board(puzzles[i]);
374
        }
375
 
376
 
377
      for(j=0;j<81;j++)
378
        {
379
          if(cpuBoard[j]!=hwBoard[j])
380
            {
381
              mismatches++;
382
              break;
383
            }
384
        }
385
 
386
      //print_board(puzzles[i]);
387
    }
388
 
389
 done:
390
  printf("%u mismatches between CPU and FPGA\n", mismatches);
391
  qsort(speedUps, nPuzzles, sizeof(uint32_t), u32_cmp);
392
  c = rand()%nPuzzles;
393
 
394
  for(i=0;i<nPuzzles;i++)
395
    {
396
      memcpy(puzzles[i], cpyPuzzles[i], sizeof(uint32_t)*81);
397
    }
398
 
399
  t1 = timestamp();
400
  for(i=0;i<nPuzzles;i++)
401
    {
402
      sudoku(puzzles[i],os);
403
    }
404
  t1 = timestamp() - t1;
405
  printf("%d\n", puzzles[c][0]);
406
 
407
  for(i=0;i<nPuzzles;i++)
408
    {
409
      memcpy(puzzles[i], cpyPuzzles[i], sizeof(uint32_t)*81);
410
    }
411
 
412
  t0 = timestamp();
413
  for(i=0;i<nPuzzles;i++)
414
   {
415
     hw_solve(d,puzzles[i]);
416
   }
417
  t0 = timestamp() - t0;
418
 
419
  printf("%d\n", puzzles[c][0]);
420
 
421
  printf("HW=%g,SW=%g\n", t0, t1);
422
  if(nPuzzles == 1)
423
    {
424
      print_board(puzzles[0]);
425
    }
426
 
427
  if(nPuzzles > 0)
428
    {
429
      printf("Speed-Ups: Median = %u, Max=%u, Min=%u\n",
430
             speedUps[nPuzzles/2],
431
             speedUps[0],
432
             speedUps[nPuzzles-1]
433
             );
434
    }
435
  for(i=0;i<nPuzzles;i++)
436
    {
437
      free(cpyPuzzles[i]);
438
      free(puzzles[i]);
439
    }
440
  free(cpyPuzzles);
441
  free(puzzles);
442
 
443
  delete d;
444
  free(os);
445
  free(cpuBoard);
446
  free(hwBoard);
447
  free(speedUps);
448
  return 0;
449
}
450
 
451
int32_t solve(uint32_t *board, uint32_t *os)
452
{
453
  int32_t i,j,idx;
454
  int32_t ii,jj;
455
  int32_t ib,jb;
456
  uint32_t set_row, set_col, set_sqr;
457
  uint32_t row_or, col_or, sqr_or;
458
  uint32_t tmp;
459
  int32_t changed = 0;
460
 
461
  do
462
    {
463
      changed=0;
464
      //print_board(board);
465
      /* compute all positions one's set value */
466
      for(i = 0; i < 9; i++)
467
        {
468
          for(j = 0; j < 9; j++)
469
            {
470
              idx = i*9 + j;
471
              os[idx] = one_set(board[idx]);
472
            }
473
        }
474
 
475
      for(i = 0; i < 9; i++)
476
        {
477
          for(j = 0; j < 9; j++)
478
            {
479
              /* already solved */
480
              if(isPow2(board[i*9+j]))
481
                continue;
482
              else if(board[idx]==0)
483
                return 0;
484
 
485
              row_or = set_row = 0;
486
              for(jj = 0; jj < 9; jj++)
487
                {
488
                  idx = i*9 + jj;
489
                  if(jj == j)
490
                    continue;
491
                  set_row |= os[idx];
492
                  row_or |= board[idx];
493
                }
494
              col_or = set_col = 0;
495
              for(ii = 0; ii < 9; ii++)
496
                {
497
                  idx = ii*9 + j;
498
                  if(ii == i)
499
                    continue;
500
                  set_col |= os[idx];
501
                  col_or |= board[idx];
502
                }
503
              sqr_or = set_sqr = 0;
504
#ifdef USE_DIV_TABLE
505
              ib = divTable[i];
506
              jb = divTable[j];
507
#else
508
              ib = 3*(i/3);
509
              jb = 3*(j/3);
510
#endif
511
              for(ii=ib;ii < ib+3;ii++)
512
                {
513
                  for(jj=jb;jj<jb+3;jj++)
514
                    {
515
                      idx = ii*9 + jj;
516
                      if((i==ii) && (j == jj))
517
                        continue;
518
                      set_sqr |= os[idx];
519
                      sqr_or |= board[idx];
520
                    }
521
                }
522
              tmp = board[i*9 + j] & ~( set_row | set_col | set_sqr);
523
 
524
              if(tmp != board[i*9 + j])
525
                {
526
                  changed = 1;
527
                }
528
              board[i*9+j] = tmp;
529
 
530
              /* check for singletons */
531
              tmp = 0;
532
              tmp = one_set(board[i*9 + j] & (~row_or));
533
              tmp |= one_set(board[i*9 + j] & (~col_or));
534
              tmp |= one_set(board[i*9 + j] & (~sqr_or));
535
              if(tmp != 0 && (board[i*9+j] != tmp))
536
                {
537
                  board[i*9+j] = tmp;
538
                  changed = 1;
539
                }
540
            }
541
        }
542
 
543
    } while(changed);
544
 
545
  return 0;
546
}
547
 
548
inline void find_min(uint32_t *board, int32_t *min_idx, int *min_pos)
549
{
550
  int32_t tmp,idx,i,j;
551
  int32_t tmin_idx,tmin_pos;
552
 
553
  tmin_idx = 0;
554
  tmin_pos = INT_MAX;
555
  for(idx=0;idx<81;idx++)
556
    {
557
      tmp = count_ones(board[idx]);
558
      tmp = (tmp == 1) ? INT_MAX : tmp;
559
      if(tmp < tmin_pos)
560
        {
561
          tmin_pos = tmp;
562
          tmin_idx = idx;
563
        }
564
    }
565
  *min_idx = tmin_idx;
566
  *min_pos = tmin_pos;
567
}
568
 
569
int32_t sudoku(uint32_t *board, uint32_t *os)
570
{
571
  int32_t rc;
572
  int32_t itrs=0;
573
  int32_t tmp,min_pos;
574
  int32_t min_idx;
575
  int32_t i,j,idx;
576
 
577
  uint32_t cell;
578
  uint32_t old[81];
579
 
580
  uint32_t unsolved_pieces = 0;
581
  uint32_t *bptr, *nbptr;
582
 
583
  int32_t stack_pos = 0;
584
  int32_t stack_size = (1<<6);
585
  uint32_t **stack = 0;
586
 
587
  stack = (uint32_t**)malloc(sizeof(uint32_t*)*stack_size);
588
  for(i=0;i<stack_size;i++)
589
    {
590
      stack[i] = (uint32_t*)malloc(sizeof(uint32_t)*81);
591
    }
592
 
593
  memcpy(stack[stack_pos++], board, sizeof(uint32_t)*81);
594
 
595
  //printf("%d poss\n", count_poss(board));
596
  while(stack_pos > 0)
597
    {
598
      itrs++;
599
      unsolved_pieces = 0;
600
      bptr = stack[--stack_pos];
601
 
602
      bzero(os,sizeof(uint32_t)*81);
603
      solve(bptr,os);
604
      rc = check_correct(bptr, &unsolved_pieces);
605
      /* solved puzzle */
606
      if(rc == 1 && unsolved_pieces == 0)
607
        {
608
          memcpy(board, bptr, sizeof(uint32_t)*81);
609
          goto solved_puzzle;
610
        }
611
      /* traversed to bottom of search tree and
612
       * didn't find a valid solution */
613
      if(rc == 0 && unsolved_pieces == 0)
614
        {
615
          continue;
616
        }
617
 
618
      find_min(bptr, &min_idx, &min_pos);
619
      cell = bptr[min_idx];
620
      while(cell != 0)
621
        {
622
          tmp = find_first_set(cell);
623
          cell &= ~(1 << tmp);
624
          nbptr = stack[stack_pos];
625
          stack_pos++;
626
          memcpy(nbptr, bptr, sizeof(uint32_t)*81);
627
          nbptr[min_idx] = 1<<tmp;
628
 
629
          assert(stack_pos < stack_size);
630
        }
631
    }
632
 solved_puzzle:
633
 
634
  for(i=0;i<stack_size;i++)
635
    {
636
      free(stack[i]);
637
    }
638
  free(stack);
639
 
640
  return itrs;
641
}

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.