/* dcache_model.c -- data cache simulation
   Copyright (C) 1999 Damjan Lampret, lampret@opencores.org
   
This file is part of OpenRISC 1000 Architectural Simulator. 

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */

/* Cache functions. 
   At the moment this functions only simulate functionality of data
   caches and do not influence on fetche/decode/execute stages and timings.
   They are here only to verify performance of various cache configurations.
 */

#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <stdarg.h>

#include "config.h"

#ifdef HAVE_INTTYPES_H
#include <inttypes.h>
#endif

#include "port.h"
#include "arch.h"
#include "dcache_model.h"
#include "abstract.h"
#include "except.h"
#include "opcode/or32.h"
#include "spr_defs.h"
#include "execute.h"
#include "stats.h"
#include "sprs.h"
#include "sim-config.h"
#include "misc.h"

/* Data cache */

struct dc_set {
  struct {
    uint32_t line[MAX_DC_BLOCK_SIZE];
    oraddr_t tagaddr;  /* tag address */
    int lru;    /* least recently used */
  } way[MAX_DC_WAYS];
} dc[MAX_DC_SETS];

void dc_info(void)
{
  if (!(cpu_state.sprs[SPR_UPR] & SPR_UPR_DCP)) {
    PRINTF("DCache not implemented. Set UPR[DCP].\n");
    return;
  }
  
  PRINTF("Data cache %dKB: ", config.dc.nsets * config.dc.blocksize * config.dc.nways / 1024);
  PRINTF("%d ways, %d sets, block size %d bytes\n", config.dc.nways, config.dc.nsets, config.dc.blocksize);
}

/* First check if data is already in the cache and if it is:
    - increment DC read hit stats,
    - set 'lru' at this way to config.dc.ustates - 1 and
      decrement 'lru' of other ways unless they have reached 0,
   and if not:
    - increment DC read miss stats
    - find lru way and entry and replace old tag with tag of the 'dataaddr'
    - set 'lru' with config.dc.ustates - 1 and decrement 'lru' of other
      ways unless they have reached 0
    - refill cache line
*/

uint32_t dc_simulate_read(oraddr_t dataaddr, oraddr_t virt_addr, int width)
{
  int set, way = -1;
  int i;
  oraddr_t tagaddr;
  uint32_t tmp = 0;

  if (!(cpu_state.sprs[SPR_UPR] & SPR_UPR_DCP) || 
      !(cpu_state.sprs[SPR_SR] & SPR_SR_DCE)   ||
      data_ci) {
    if (width == 4)
      tmp = evalsim_mem32(dataaddr, virt_addr);
    else if (width == 2)
      tmp = evalsim_mem16(dataaddr, virt_addr);
    else if (width == 1)
      tmp = evalsim_mem8(dataaddr, virt_addr);

    if (cur_area && cur_area->log)
      fprintf (cur_area->log, "[%"PRIxADDR"] -> read %08"PRIx32"\n", dataaddr,
               tmp);

    return tmp;
  }

  /* Which set to check out? */
  set = (dataaddr / config.dc.blocksize) % config.dc.nsets;
  tagaddr = (dataaddr / config.dc.blocksize) / config.dc.nsets;
  
  /* Scan all ways and try to find a matching way. */
  for (i = 0; i < config.dc.nways; i++)
    if (dc[set].way[i].tagaddr == tagaddr)
      way = i;
      
  /* Did we find our cached data? */
  if (way >= 0) { /* Yes, we did. */
    dc_stats.readhit++;

    for (i = 0; i < config.dc.nways; i++)
      if (dc[set].way[i].lru > dc[set].way[way].lru)
        dc[set].way[i].lru--;
    dc[set].way[way].lru = config.dc.ustates - 1;
    runtime.sim.mem_cycles += config.dc.load_hitdelay;

    tmp = dc[set].way[way].line[(dataaddr & (config.dc.blocksize - 1)) >> 2];
    if (width == 4)
      return tmp;
    else if (width == 2) {
      tmp = ((tmp >> ((dataaddr & 2) ? 0 : 16)) & 0xffff);
      return tmp;
    }
    else if (width == 1) {
      tmp = ((tmp  >> (8 * (3 - (dataaddr & 3)))) & 0xff);
      return tmp;
    }
  } else {  /* No, we didn't. */
    int minlru = config.dc.ustates - 1;
    int minway = 0;
                
    dc_stats.readmiss++;
                
    for (i = 0; i < config.dc.nways; i++) {
      if (dc[set].way[i].lru < minlru) {
        minway = i;
        minlru = dc[set].way[i].lru;
      }
    }
        
    for (i = 0; i < (config.dc.blocksize); i += 4) {
      /* FIXME: What is the virtual address meant to be? (ie. What happens if
       * we read out of memory while refilling a cache line?) */
      tmp = evalsim_mem32((dataaddr & ~(config.dc.blocksize - 1)) + (((dataaddr & ~ADDR_C(3)) + i) & (config.dc.blocksize - 1)), 0);

      dc[set].way[minway].line[((dataaddr + i) & (config.dc.blocksize - 1)) >> 2] = tmp;
      if(!cur_area) {
        dc[set].way[minway].tagaddr = -1;
        dc[set].way[minway].lru = 0;
        return 0;
      } else if (cur_area->log)
        fprintf (cur_area->log, "[%"PRIxADDR"] -> read %08"PRIx32"\n", dataaddr,
                 tmp);
    }

    dc[set].way[minway].tagaddr = tagaddr;
    for (i = 0; i < config.dc.nways; i++)
      if (dc[set].way[i].lru)
        dc[set].way[i].lru--;
    dc[set].way[minway].lru = config.dc.ustates - 1;
    runtime.sim.mem_cycles += config.dc.load_missdelay;

    tmp = dc[set].way[minway].line[(dataaddr & (config.dc.blocksize - 1)) >> 2];
    if (width == 4)
      return tmp;
    else if (width == 2) {
      tmp = (tmp >> ((dataaddr & 2) ? 0 : 16)) & 0xffff;
      return tmp;
    }
    else if (width == 1) {
      tmp = (tmp  >> (8 * (3 - (dataaddr & 3)))) & 0xff;
      return tmp;
    }
  }
  return 0;
}

/* First check if data is already in the cache and if it is:
    - increment DC write hit stats,
    - set 'lru' at this way to config.dc.ustates - 1 and
      decrement 'lru' of other ways unless they have reached 0,
   and if not:
    - increment DC write miss stats
    - find lru way and entry and replace old tag with tag of the 'dataaddr'
    - set 'lru' with config.dc.ustates - 1 and decrement 'lru' of other
      ways unless they have reached 0
*/

void dc_simulate_write(oraddr_t dataaddr, oraddr_t virt_addr, uint32_t data,
                       int width)
{
  int set, way = -1;
  int i;
  oraddr_t tagaddr;
  uint32_t tmp;

  if (width == 4)
    setsim_mem32(dataaddr, virt_addr, data);
  else if (width == 2)    
    setsim_mem16(dataaddr, virt_addr, data);
  else if (width == 1)    
    setsim_mem8(dataaddr, virt_addr, data);

  if (!(cpu_state.sprs[SPR_UPR] & SPR_UPR_DCP) || 
      !(cpu_state.sprs[SPR_SR] & SPR_SR_DCE) ||
      data_ci || !cur_area)
    return;
  
  /* Which set to check out? */
  set = (dataaddr / config.dc.blocksize) % config.dc.nsets;
  tagaddr = (dataaddr / config.dc.blocksize) / config.dc.nsets;
  
  /* Scan all ways and try to find a matching way. */
  for (i = 0; i < config.dc.nways; i++)
    if (dc[set].way[i].tagaddr == tagaddr)
      way = i;
      
  /* Did we find our cached data? */
  if (way >= 0) { /* Yes, we did. */
    dc_stats.writehit++;
    
    for (i = 0; i < config.dc.nways; i++)
      if (dc[set].way[i].lru > dc[set].way[way].lru)
        dc[set].way[i].lru--;
    dc[set].way[way].lru = config.dc.ustates - 1;
    runtime.sim.mem_cycles += config.dc.store_hitdelay;

    tmp = dc[set].way[way].line[(dataaddr & (config.dc.blocksize - 1)) >> 2];
    if (width == 4)
      tmp = data;
    else if (width == 2) {
      tmp &= 0xffff << ((dataaddr & 2) ? 16 : 0);
      tmp |= (data & 0xffff) << ((dataaddr & 2) ? 0 : 16);
    }
    else if (width == 1) {
      tmp &= ~(0xff << (8 * (3 - (dataaddr & 3))));
      tmp |= (data & 0xff) << (8 * (3 - (dataaddr & 3)));
    }
    dc[set].way[way].line[(dataaddr & (config.dc.blocksize - 1)) >> 2] = tmp;
  }
  else {  /* No, we didn't. */
    int minlru = config.dc.ustates - 1;
    int minway = 0;
                
    dc_stats.writemiss++;
                
    for (i = 0; i < config.dc.nways; i++)
      if (dc[set].way[i].lru < minlru)
        minway = i;
        
    for (i = 0; i < (config.dc.blocksize); i += 4) {
      dc[set].way[minway].line[((dataaddr + i) & (config.dc.blocksize - 1)) >> 2] = 
        /* FIXME: Same comment as in dc_simulate_read */
        evalsim_mem32((dataaddr & ~(config.dc.blocksize - 1)) + (((dataaddr & ~3ul)+ i) & (config.dc.blocksize - 1)), 0);
      if(!cur_area) {
        dc[set].way[minway].tagaddr = -1;
        dc[set].way[minway].lru = 0;
        return;
      }
    }

    dc[set].way[minway].tagaddr = tagaddr;
    for (i = 0; i < config.dc.nways; i++)
      if (dc[set].way[i].lru)
        dc[set].way[i].lru--;
    dc[set].way[minway].lru = config.dc.ustates - 1;
    runtime.sim.mem_cycles += config.dc.store_missdelay;
  }
}

/* First check if data is already in the cache and if it is:
    - invalidate block if way isn't locked
   otherwise don't do anything.
*/

void dc_inv(oraddr_t dataaddr)
{
  int set, way = -1;
  int i;
  oraddr_t tagaddr;

  if (!(cpu_state.sprs[SPR_UPR] & SPR_UPR_DCP))
    return;

  /* Which set to check out? */
  set = (dataaddr / config.dc.blocksize) % config.dc.nsets;
  tagaddr = (dataaddr / config.dc.blocksize) / config.dc.nsets;
  
  if (!(cpu_state.sprs[SPR_SR] & SPR_SR_DCE)) {
    for (i = 0; i < config.dc.nways; i++) {
      dc[set].way[i].tagaddr = -1;
      dc[set].way[i].lru = 0;
    }
    return;
  }
   /* Scan all ways and try to find a matching way. */
  for (i = 0; i < config.dc.nways; i++)
    if (dc[set].way[i].tagaddr == tagaddr)
      way = i;
      
  /* Did we find our cached data? */
  if (way >= 0) { /* Yes, we did. */
    dc[set].way[way].tagaddr = -1;
    dc[set].way[way].lru = 0;
  }
}

/*-----------------------------------------------------[ DC configuration ]---*/
void dc_enabled(union param_val val, void *dat)
{
  config.dc.enabled = val.int_val;
  if(val.int_val)
    cpu_state.sprs[SPR_UPR] |= SPR_UPR_DCP;
  else
    cpu_state.sprs[SPR_UPR] &= ~SPR_UPR_DCP;
}

void dc_nsets(union param_val val, void *dat)
{
  if (is_power2(val.int_val) && val.int_val <= MAX_DC_SETS){
    config.dc.nsets = val.int_val;
    cpu_state.sprs[SPR_DCCFGR] &= ~SPR_DCCFGR_NCS;
    cpu_state.sprs[SPR_DCCFGR] |= log2_int(val.int_val) << 3;
  }
  else {
    char tmp[200];
    sprintf (tmp, "value of power of two and lower or equal than %i expected.", MAX_DC_SETS);
    CONFIG_ERROR(tmp);
  }
}

void dc_nways(union param_val val, void *dat)
{
  if (is_power2(val.int_val) && val.int_val <= MAX_DC_WAYS){
    config.dc.nways = val.int_val;
    cpu_state.sprs[SPR_DCCFGR] &= ~SPR_DCCFGR_NCW;
    cpu_state.sprs[SPR_DCCFGR] |= log2_int(val.int_val);
  }
  else{
    char tmp[200];
    sprintf (tmp, "value of power of two and lower or equal than %i expected.",
    MAX_DC_WAYS);
    CONFIG_ERROR(tmp);
  }
}

void dc_blocksize(union param_val val, void *dat)
{
  if (is_power2(val.int_val)) {
    config.dc.blocksize = val.int_val;
    cpu_state.sprs[SPR_ICCFGR] &= ~SPR_ICCFGR_CBS;
    cpu_state.sprs[SPR_ICCFGR] |= log2_int(val.int_val) << 7;
  } else
    CONFIG_ERROR("value of power of two expected.");
}

void dc_ustates(union param_val val, void *dat)
{
  if (val.int_val >= 2 && val.int_val <= 4)
    config.dc.ustates = val.int_val;
  else
    CONFIG_ERROR("invalid USTATE.");
}

void dc_load_missdelay(union param_val val, void *dat)
{
  config.dc.load_missdelay = val.int_val;
}

void dc_load_hitdelay(union param_val val, void *dat)
{
  config.dc.load_hitdelay = val.int_val;
}

void dc_store_missdelay(union param_val val, void *dat)
{
  config.dc.store_missdelay = val.int_val;
}

void dc_store_hitdelay(union param_val val, void *dat)
{
  config.dc.store_hitdelay = val.int_val;
}

void reg_dc_sec(void)
{
  struct config_section *sec = reg_config_sec("dc", NULL, NULL);

  reg_config_param(sec, "enabled", paramt_int, dc_enabled);
  reg_config_param(sec, "nsets", paramt_int, dc_nsets);
  reg_config_param(sec, "nways", paramt_int, dc_nways);
  reg_config_param(sec, "blocksize", paramt_int, dc_blocksize);
  reg_config_param(sec, "ustates", paramt_int, dc_ustates);
  reg_config_param(sec, "load_missdelay", paramt_int, dc_load_missdelay);
  reg_config_param(sec, "load_hitdelay", paramt_int, dc_load_hitdelay);
  reg_config_param(sec, "store_missdelay", paramt_int, dc_store_missdelay);
  reg_config_param(sec, "store_hitdelay", paramt_int, dc_store_hitdelay);
}
