OpenCores
URL https://opencores.org/ocsvn/sudoku/sudoku/trunk

Subversion Repositories sudoku

[/] [sudoku/] [branches/] [zynq/] [sw/] [main.cc] - Rev 6

Compare with Previous | Blame | View Log

#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <cstring>
#include <time.h>
#include <limits.h>
#include <stdint.h>
#include <sys/time.h>
#include <unistd.h>
 
#include "driver.h"
#include "counters.h"
 
#define USE_DIV_TABLE 1
const int divTable[9] = {0,0,0,3,3,3,6,6,6};
 
double timestamp()
{
  struct timeval tv;
  gettimeofday (&tv, 0);
  return tv.tv_sec + 1e-6*tv.tv_usec;
}
 
int32_t sudoku(uint32_t *board, uint32_t *os);
 
int u32_cmp(const void* a, const void *b)
{
  uint32_t x = *((uint32_t*)a);
  uint32_t y = *((uint32_t*)b);
  return (x<y);
}
 
inline uint32_t ln2(uint32_t x)
{
  uint32_t y = 1;
  while(x > 1)
    {
      y++;
      x = x>>1;
    }
  return y;
}
 
inline uint32_t one_set(uint32_t x)
{
  /* all ones if pow2, otherwise 0 */
  uint32_t pow2 = (x & (x-1));
  uint32_t m = (pow2 == 0);
  return ((~m) + 1) & x;
}
 
inline uint32_t find_first_set(uint32_t x)
{
#ifdef __GNUC__
  return __builtin_ctz(x);
#else
  /* copied from the sparc v9 reference manual */
  return count_ones(x ^ (~(-x))) - 1;
#endif
}
 
inline uint32_t count_ones(uint32_t x)
{
#ifdef __GNUC__
  return __builtin_popcount(x);
#else
  uint32_t y = 0;
  for(y=0;x!=0;y++)
    {
      x &= (x-1);
    }
  return y;
#endif
}
 
inline uint32_t isPow2(uint32_t x)
{
  uint32_t pow2 = (x & (x-1));
  return (pow2 == 0);
}
 
int32_t check_correct(uint32_t *board, uint32_t *unsolved_pieces)
{
  int32_t i,j;
  int32_t ii,jj;
  int32_t si,sj;
  int32_t tmp;
 
  *unsolved_pieces = 0;
  int32_t violated = 0;
 
  uint32_t counts[81];
  for(i=0;i < 81; i++)
    {
      counts[i] = count_ones(board[i]);
      if(counts[i]!=1)
	{
	  *unsolved_pieces = 1;
	  return 0;
	}
    }
 
 
  /* for each row */
  for(i=0;i<9;i++)
    {
      uint32_t sums[9] = {0};
      for(j=0;j<9;j++)
	{
	  if(counts[i*9 +j] == 1)
	    {
	      tmp =ln2(board[i*9+j])-1;
	      sums[tmp]++;
	      if(sums[tmp] > 1)
		{
		  //char buf[80];
		  //sprintf_binary(board[i*9+j],buf,80);
		  //printf("violated row %d, sums[%d]=%d, board = %s\n", i, tmp, sums[tmp], buf);
		  //print_board(board);
		  violated = 1;
		  goto done;
		}
	    }
	}
    }
  /* for each column */
 
   for(j=0;j<9;j++)
   {
     uint32_t sums[9] = {0};
     for(i=0;i<9;i++)
     {
       if(counts[i*9 +j] == 1)
	 {
	   tmp =ln2(board[i*9+j])-1;
	   sums[tmp]++;
	   if(sums[tmp] > 1)
	     {
	       violated = 1;
	       goto done;
	       //printf("violated column %d, sums[%d]=%d\n", i, tmp, sums[tmp]);
	       //return 0;
	     }
	 }
     }
   }
 
   for(i=0;i<9;i++)
     {
#ifdef USE_DIV_TABLE
       si = divTable[i];
#else
       si = 3*(i/3);
#endif
       for(j=0;j<9;j++)
	 {
#ifdef USE_DIV_TABLE
	   sj = 3*(j/3);
#else
	   sj = divTable[j];
#endif
	   uint32_t sums[9] = {0};
	   for(ii=si;ii<(si+3);ii++)
	     {
	       for(jj=sj;jj<(sj+3);jj++)
		 {
		   if(counts[ii*9 +jj] == 1)
		     {
		       tmp =ln2(board[ii*9+jj])-1;
		       sums[tmp]++;
		       if(sums[tmp] > 1)
			 {
			   violated = 1;
			   goto done;
			 }
		     }
		 }
	     }
	 }
     }
 
done:
   return (violated == 0);
}
 
 
 
void print_board(uint32_t *board)
{
  int32_t i,j;
  char buf[80] = {0};
  for(i=0;i<9;i++)
    {
      for(j=0;j<9;j++)
	{
	  /* sprintf_binary(board[i*9+j], buf, 80);
	   * printf("%s ", buf); */
	  printf("%d ", ln2(board[i*9+j]));
	}
      printf("\n");
    }
  printf("\n");
}
 
 
int32_t hw_solve(Driver *d, uint32_t *board)
{
  uint32_t rc=0;
  d->reset();
  const uint32_t w_bit = 1 << 31;
 
  /*
  if(init == 0)
    {
      memset(&cCnt,0,sizeof(cCnt));
      initTicks(cCnt);
      init = 1;
    }
  */
  //uint64_t c0 = getTicks(cCnt);
  for(int i = 0; i < 81; i++)
    {
      uint32_t addr = i;
      uint32_t data = board[i];
      uint32_t v = w_bit | (data << 7) | addr;
      d->write(1, v);
    }
  //c0 = getTicks(cCnt) - c0;
  //printf("%llu cycles to write data to FPGA\n", c0);
 
  /* only pulsed for one cycle */
  d->write(7,1);
 
  do
    {
      rc = d->read(0) & 0x7;
    } 
  while(rc ==0);
 
  //c0 = getTicks(cCnt);
  for(int i = 0; i < 81; i++)
    {
      uint32_t addr = i;
      d->write(1, addr);
      uint32_t v = d->read(2);
      board[i] = v;
    }
  //c0 = getTicks(cCnt) - c0;
  //printf("%llu cycles to read data from FPGA\n", c0);
 
  return (rc == 0x1);
}
 
int main(int argc, char **argv)
{
  uint32_t *cpuBoard, *hwBoard;
  uint32_t *os; 
  uint64_t c0,c1,i0;
  int i,j,rc;
  uint32_t **puzzles;
  uint32_t **cpyPuzzles;
  uint32_t nPuzzles = 0;
  uint32_t *speedUps = 0;
  int c;  
 
  hwCounter_t cCnt,iCnt;
  memset(&cCnt,0,sizeof(cCnt));
  memset(&iCnt,0,sizeof(iCnt));
  initTicks(cCnt);
  initInsns(iCnt);
 
  Driver *d = new Driver(0x79100000);
 
  FILE *fp = 0;
 
  if(argc < 2)
    return -1;
 
 
  fp = fopen(argv[1], "r");
  assert(fp);
 
 
 
  while((c = fgetc(fp)) != EOF)
    {
      if(c == '\n')
        {
          nPuzzles++;
        }
    }
  rewind(fp);
  puzzles = (uint32_t**)malloc(sizeof(uint32_t*)*nPuzzles);
  cpyPuzzles = (uint32_t**)malloc(sizeof(uint32_t*)*nPuzzles);
  speedUps = (uint32_t*)malloc(sizeof(uint32_t)*nPuzzles);
  memset(speedUps,0,sizeof(uint32_t)*nPuzzles);
 
  for(i=0;i<nPuzzles;i++)
    {
      puzzles[i] = (uint32_t*)malloc(sizeof(uint32_t)*81);
      cpyPuzzles[i] = (uint32_t*)malloc(sizeof(uint32_t)*81);
    }
 
  j = i = 0;
  while((c  = fgetc(fp)) != EOF)
    {
      if(c == '\n')
        {
          j = 0;
          i++;
        }
      else if(c == ' ')
        {
          continue;
        }
      else if(c == '.')
        {
          puzzles[i][j++] = 0x1ff;
        }
      else
        {
          if(c >= 65 && c <= 70)
            {
              c -= 55;
            }
          else
            {
              c -= 48;
            }
          puzzles[i][j++] = 1 << (c-1);
        }
    }
  fclose(fp);
  for(i=0;i<nPuzzles;i++)
    {
      memcpy(cpyPuzzles[i], puzzles[i], sizeof(uint32_t)*81);
    }
 
  printf("nPuzzles=%d\n", nPuzzles);
  cpuBoard = (uint32_t*)malloc(sizeof(uint32_t)*81);
  hwBoard = (uint32_t*)malloc(sizeof(uint32_t)*81);
  os = (uint32_t*)malloc(sizeof(uint32_t)*81);
  uint32_t mismatches = 0;
 
  double t0,t1;
  for(i=0;i<nPuzzles;i++)
    {
      c0 = c1 = 1;
      t0 = t1 = 0.0;
 
      memcpy(cpuBoard,puzzles[i],sizeof(uint32_t)*81);
      memcpy(hwBoard,puzzles[i],sizeof(uint32_t)*81);
 
      c0 = getTicks(cCnt);
      i0 = getInsns(iCnt); 
      c = sudoku(cpuBoard, os);
      i0 = getInsns(iCnt) - i0;
      c0 = getTicks(cCnt) - c0;
 
      double ipc = ((double)i0) / ((double)c0);
 
      c1 = getTicks(cCnt);
      rc = hw_solve(d,hwBoard);
      c1 = getTicks(cCnt) - c1;
 
      speedUps[i] = c0/c1;
      printf("SpeedUp = %llu (%d) : SW ticks = %llu (IPC = %g), HW ticks = %llu\n", 
	     c0/c1, c, c0, ipc, c1);
 
      if(rc != 1) 
	{
	  printf("error with puzzle %d\n", i);
	  //print_board(puzzles[i]);
	}
 
 
      for(j=0;j<81;j++)
	{
	  if(cpuBoard[j]!=hwBoard[j])
	    {
	      mismatches++;
	      break;
	    }
	}
 
      //print_board(puzzles[i]);
    }
 
 done:
  printf("%u mismatches between CPU and FPGA\n", mismatches);
  qsort(speedUps, nPuzzles, sizeof(uint32_t), u32_cmp);
  c = rand()%nPuzzles;
 
  for(i=0;i<nPuzzles;i++)
    {
      memcpy(puzzles[i], cpyPuzzles[i], sizeof(uint32_t)*81);
    }
 
  t1 = timestamp();
  for(i=0;i<nPuzzles;i++)
    {
      sudoku(puzzles[i],os);
    }
  t1 = timestamp() - t1;
  printf("%d\n", puzzles[c][0]);
 
  for(i=0;i<nPuzzles;i++)
    {
      memcpy(puzzles[i], cpyPuzzles[i], sizeof(uint32_t)*81);
    }
 
  t0 = timestamp();
  for(i=0;i<nPuzzles;i++)
   {
     hw_solve(d,puzzles[i]);
   }
  t0 = timestamp() - t0;
 
  printf("%d\n", puzzles[c][0]);
 
  printf("HW=%g,SW=%g\n", t0, t1);
  if(nPuzzles == 1)
    {
      print_board(puzzles[0]);
    }
 
  if(nPuzzles > 0)
    {
      printf("Speed-Ups: Median = %u, Max=%u, Min=%u\n", 
	     speedUps[nPuzzles/2], 
	     speedUps[0], 
	     speedUps[nPuzzles-1]
	     );
    }
  for(i=0;i<nPuzzles;i++)
    {
      free(cpyPuzzles[i]);
      free(puzzles[i]);
    }
  free(cpyPuzzles);
  free(puzzles);
 
  delete d;
  free(os); 
  free(cpuBoard);
  free(hwBoard);
  free(speedUps);
  return 0;
}
 
int32_t solve(uint32_t *board, uint32_t *os)
{
  int32_t i,j,idx;
  int32_t ii,jj;
  int32_t ib,jb;
  uint32_t set_row, set_col, set_sqr;
  uint32_t row_or, col_or, sqr_or;
  uint32_t tmp;
  int32_t changed = 0;
 
  do
    {
      changed=0;
      //print_board(board);
      /* compute all positions one's set value */
      for(i = 0; i < 9; i++)
	{
	  for(j = 0; j < 9; j++)
	    {
	      idx = i*9 + j;
	      os[idx] = one_set(board[idx]);
	    }
	}
 
      for(i = 0; i < 9; i++)
	{
	  for(j = 0; j < 9; j++)
	    {
	      /* already solved */
	      if(isPow2(board[i*9+j]))
		continue;
	      else if(board[idx]==0)
		return 0;
 
	      row_or = set_row = 0;
	      for(jj = 0; jj < 9; jj++)
		{
		  idx = i*9 + jj;
		  if(jj == j)
		    continue;
		  set_row |= os[idx];
		  row_or |= board[idx];
		}
	      col_or = set_col = 0;
	      for(ii = 0; ii < 9; ii++)
		{
		  idx = ii*9 + j;
		  if(ii == i)
		    continue;
		  set_col |= os[idx];
		  col_or |= board[idx];
		}
	      sqr_or = set_sqr = 0;
#ifdef USE_DIV_TABLE
	      ib = divTable[i];
	      jb = divTable[j];
#else
	      ib = 3*(i/3);
	      jb = 3*(j/3);
#endif
	      for(ii=ib;ii < ib+3;ii++)
		{
		  for(jj=jb;jj<jb+3;jj++)
		    {
		      idx = ii*9 + jj;
		      if((i==ii) && (j == jj))
			continue;
		      set_sqr |= os[idx];
		      sqr_or |= board[idx];
		    }
		}
	      tmp = board[i*9 + j] & ~( set_row | set_col | set_sqr);
 
	      if(tmp != board[i*9 + j])
		{
		  changed = 1;
		}
	      board[i*9+j] = tmp;
 
	      /* check for singletons */
	      tmp = 0;
	      tmp = one_set(board[i*9 + j] & (~row_or));
	      tmp |= one_set(board[i*9 + j] & (~col_or));
	      tmp |= one_set(board[i*9 + j] & (~sqr_or));
	      if(tmp != 0 && (board[i*9+j] != tmp))
		{
		  board[i*9+j] = tmp;
		  changed = 1;
		}
	    }
	}
 
    } while(changed);
 
  return 0;
}
 
inline void find_min(uint32_t *board, int32_t *min_idx, int *min_pos)
{
  int32_t tmp,idx,i,j;
  int32_t tmin_idx,tmin_pos;
 
  tmin_idx = 0;
  tmin_pos = INT_MAX;
  for(idx=0;idx<81;idx++)
    {
      tmp = count_ones(board[idx]);
      tmp = (tmp == 1) ? INT_MAX : tmp;
      if(tmp < tmin_pos)
	{
	  tmin_pos = tmp;
	  tmin_idx = idx;
	}
    }
  *min_idx = tmin_idx;
  *min_pos = tmin_pos;
}
 
int32_t sudoku(uint32_t *board, uint32_t *os)
{
  int32_t rc;
  int32_t itrs=0;
  int32_t tmp,min_pos;
  int32_t min_idx;
  int32_t i,j,idx;
 
  uint32_t cell;
  uint32_t old[81];
 
  uint32_t unsolved_pieces = 0;
  uint32_t *bptr, *nbptr;
 
  int32_t stack_pos = 0;
  int32_t stack_size = (1<<6);
  uint32_t **stack = 0;
 
  stack = (uint32_t**)malloc(sizeof(uint32_t*)*stack_size);
  for(i=0;i<stack_size;i++)
    {
      stack[i] = (uint32_t*)malloc(sizeof(uint32_t)*81);
    }
 
  memcpy(stack[stack_pos++], board, sizeof(uint32_t)*81);
 
  //printf("%d poss\n", count_poss(board));
  while(stack_pos > 0)
    {
      itrs++;
      unsolved_pieces = 0;
      bptr = stack[--stack_pos];
 
      bzero(os,sizeof(uint32_t)*81);
      solve(bptr,os);
      rc = check_correct(bptr, &unsolved_pieces);
      /* solved puzzle */
      if(rc == 1 && unsolved_pieces == 0)
	{
	  memcpy(board, bptr, sizeof(uint32_t)*81);
	  goto solved_puzzle;
	}
      /* traversed to bottom of search tree and
       * didn't find a valid solution */
      if(rc == 0 && unsolved_pieces == 0)
	{
	  continue;
	}
 
      find_min(bptr, &min_idx, &min_pos);
      cell = bptr[min_idx];  
      while(cell != 0)
	{
	  tmp = find_first_set(cell);
	  cell &= ~(1 << tmp);
	  nbptr = stack[stack_pos];
	  stack_pos++;
	  memcpy(nbptr, bptr, sizeof(uint32_t)*81);
	  nbptr[min_idx] = 1<<tmp;
 
	  assert(stack_pos < stack_size);
	}
    }
 solved_puzzle:
 
  for(i=0;i<stack_size;i++)
    {
      free(stack[i]);
    } 
  free(stack);
 
  return itrs;
}
 

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.