OpenCores

/* Copyright (C) 2012 Free Software Foundation, Inc.

/* Copyright (C) 2012 Free Software Foundation, Inc.

   Contributed by Torvald Riegel <triegel@redhat.com>.

   Contributed by Torvald Riegel <triegel@redhat.com>.

   This file is part of the GNU Transactional Memory Library (libitm).

   This file is part of the GNU Transactional Memory Library (libitm).

   Libitm is free software; you can redistribute it and/or modify it

   Libitm is free software; you can redistribute it and/or modify it

   under the terms of the GNU General Public License as published by

   under the terms of the GNU General Public License as published by

   the Free Software Foundation; either version 3 of the License, or

   the Free Software Foundation; either version 3 of the License, or

   (at your option) any later version.

   (at your option) any later version.

   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY

   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY

   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for

   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for

   more details.

   more details.

   Under Section 7 of GPL version 3, you are granted additional

   Under Section 7 of GPL version 3, you are granted additional

   permissions described in the GCC Runtime Library Exception, version

   permissions described in the GCC Runtime Library Exception, version

   3.1, as published by the Free Software Foundation.

   3.1, as published by the Free Software Foundation.

   You should have received a copy of the GNU General Public License and

   You should have received a copy of the GNU General Public License and

   a copy of the GCC Runtime Library Exception along with this program;

   a copy of the GCC Runtime Library Exception along with this program;

   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see

   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see

   <http://www.gnu.org/licenses/>.  */

   <http://www.gnu.org/licenses/>.  */

#include "libitm_i.h"

#include "libitm_i.h"

using namespace GTM;

using namespace GTM;

namespace {

namespace {

// This group consists of all TM methods that synchronize via multiple locks

// This group consists of all TM methods that synchronize via multiple locks

// (or ownership records).

// (or ownership records).

struct ml_mg : public method_group

struct ml_mg : public method_group

  static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1;

  static const gtm_word LOCK_BIT = (~(gtm_word)0 >> 1) + 1;

  static const gtm_word INCARNATION_BITS = 3;

  static const gtm_word INCARNATION_BITS = 3;

  static const gtm_word INCARNATION_MASK = 7;

  static const gtm_word INCARNATION_MASK = 7;

  // Maximum time is all bits except the lock bit, the overflow reserve bit,

  // Maximum time is all bits except the lock bit, the overflow reserve bit,

  // and the incarnation bits).

  // and the incarnation bits).

  static const gtm_word TIME_MAX = (~(gtm_word)0 >> (2 + INCARNATION_BITS));

  static const gtm_word TIME_MAX = (~(gtm_word)0 >> (2 + INCARNATION_BITS));

  // The overflow reserve bit is the MSB of the timestamp part of an orec,

  // The overflow reserve bit is the MSB of the timestamp part of an orec,

  // so we can have TIME_MAX+1 pending timestamp increases before we overflow.

  // so we can have TIME_MAX+1 pending timestamp increases before we overflow.

  static const gtm_word OVERFLOW_RESERVE = TIME_MAX + 1;

  static const gtm_word OVERFLOW_RESERVE = TIME_MAX + 1;

  static bool is_locked(gtm_word o) { return o & LOCK_BIT; }

  static bool is_locked(gtm_word o) { return o & LOCK_BIT; }

  static gtm_word set_locked(gtm_thread *tx)

  static gtm_word set_locked(gtm_thread *tx)

    return ((uintptr_t)tx >> 1) | LOCK_BIT;

    return ((uintptr_t)tx >> 1) | LOCK_BIT;

  // Returns a time that includes the lock bit, which is required by both

  // Returns a time that includes the lock bit, which is required by both

  // validate() and is_more_recent_or_locked().

  // validate() and is_more_recent_or_locked().

  static gtm_word get_time(gtm_word o) { return o >> INCARNATION_BITS; }

  static gtm_word get_time(gtm_word o) { return o >> INCARNATION_BITS; }

  static gtm_word set_time(gtm_word time) { return time << INCARNATION_BITS; }

  static gtm_word set_time(gtm_word time) { return time << INCARNATION_BITS; }

  static bool is_more_recent_or_locked(gtm_word o, gtm_word than_time)

  static bool is_more_recent_or_locked(gtm_word o, gtm_word than_time)

    // LOCK_BIT is the MSB; thus, if O is locked, it is larger than TIME_MAX.

    // LOCK_BIT is the MSB; thus, if O is locked, it is larger than TIME_MAX.

    return get_time(o) > than_time;

    return get_time(o) > than_time;

  static bool has_incarnation_left(gtm_word o)

  static bool has_incarnation_left(gtm_word o)

    return (o & INCARNATION_MASK) < INCARNATION_MASK;

    return (o & INCARNATION_MASK) < INCARNATION_MASK;

  static gtm_word inc_incarnation(gtm_word o) { return o + 1; }

  static gtm_word inc_incarnation(gtm_word o) { return o + 1; }

  // The shared time base.

  // The shared time base.

  atomic<gtm_word> time __attribute__((aligned(HW_CACHELINE_SIZE)));

  atomic<gtm_word> time __attribute__((aligned(HW_CACHELINE_SIZE)));

  // The array of ownership records.

  // The array of ownership records.

  atomic<gtm_word>* orecs __attribute__((aligned(HW_CACHELINE_SIZE)));

  atomic<gtm_word>* orecs __attribute__((aligned(HW_CACHELINE_SIZE)));

  char tailpadding[HW_CACHELINE_SIZE - sizeof(atomic<gtm_word>*)];

  char tailpadding[HW_CACHELINE_SIZE - sizeof(atomic<gtm_word>*)];

  // Location-to-orec mapping.  Stripes of 16B mapped to 2^19 orecs.

  // Location-to-orec mapping.  Stripes of 16B mapped to 2^19 orecs.

  static const gtm_word L2O_ORECS = 1 << 19;

  static const gtm_word L2O_ORECS = 1 << 19;

  static const gtm_word L2O_SHIFT = 4;

  static const gtm_word L2O_SHIFT = 4;

  static size_t get_orec(const void* addr)

  static size_t get_orec(const void* addr)

    return ((uintptr_t)addr >> L2O_SHIFT) & (L2O_ORECS - 1);

    return ((uintptr_t)addr >> L2O_SHIFT) & (L2O_ORECS - 1);

  static size_t get_next_orec(size_t orec)

  static size_t get_next_orec(size_t orec)

    return (orec + 1) & (L2O_ORECS - 1);

    return (orec + 1) & (L2O_ORECS - 1);

  // Returns the next orec after the region.

  // Returns the next orec after the region.

  static size_t get_orec_end(const void* addr, size_t len)

  static size_t get_orec_end(const void* addr, size_t len)

    return (((uintptr_t)addr + len + (1 << L2O_SHIFT) - 1) >> L2O_SHIFT)

    return (((uintptr_t)addr + len + (1 << L2O_SHIFT) - 1) >> L2O_SHIFT)

        & (L2O_ORECS - 1);

        & (L2O_ORECS - 1);

  virtual void init()

  virtual void init()

    // We assume that an atomic<gtm_word> is backed by just a gtm_word, so

    // We assume that an atomic<gtm_word> is backed by just a gtm_word, so

    // starting with zeroed memory is fine.

    // starting with zeroed memory is fine.

    orecs = (atomic<gtm_word>*) xcalloc(

    orecs = (atomic<gtm_word>*) xcalloc(

        sizeof(atomic<gtm_word>) * L2O_ORECS, true);

        sizeof(atomic<gtm_word>) * L2O_ORECS, true);

    // This store is only executed while holding the serial lock, so relaxed

    // This store is only executed while holding the serial lock, so relaxed

    // memory order is sufficient here.

    // memory order is sufficient here.

    time.store(0, memory_order_relaxed);

    time.store(0, memory_order_relaxed);

  virtual void fini()

  virtual void fini()

    free(orecs);

    free(orecs);

  // We only re-initialize when our time base overflows.  Thus, only reset

  // We only re-initialize when our time base overflows.  Thus, only reset

  // the time base and the orecs but do not re-allocate the orec array.

  // the time base and the orecs but do not re-allocate the orec array.

  virtual void reinit()

  virtual void reinit()

    // This store is only executed while holding the serial lock, so relaxed

    // This store is only executed while holding the serial lock, so relaxed

    // memory order is sufficient here.  Same holds for the memset.

    // memory order is sufficient here.  Same holds for the memset.

    time.store(0, memory_order_relaxed);

    time.store(0, memory_order_relaxed);

    memset(orecs, 0, sizeof(atomic<gtm_word>) * L2O_ORECS);

    memset(orecs, 0, sizeof(atomic<gtm_word>) * L2O_ORECS);

};

};

static ml_mg o_ml_mg;

static ml_mg o_ml_mg;

// The multiple lock, write-through TM method.

// The multiple lock, write-through TM method.

// Maps each memory location to one of the orecs in the orec array, and then

// Maps each memory location to one of the orecs in the orec array, and then

// acquires the associated orec eagerly before writing through.

// acquires the associated orec eagerly before writing through.

// Writes require undo-logging because we are dealing with several locks/orecs

// Writes require undo-logging because we are dealing with several locks/orecs

// and need to resolve deadlocks if necessary by aborting one of the

// and need to resolve deadlocks if necessary by aborting one of the

// transactions.

// transactions.

// Reads do time-based validation with snapshot time extensions.  Incarnation

// Reads do time-based validation with snapshot time extensions.  Incarnation

// numbers are used to decrease contention on the time base (with those,

// numbers are used to decrease contention on the time base (with those,

// aborted transactions do not need to acquire a new version number for the

// aborted transactions do not need to acquire a new version number for the

// data that has been previously written in the transaction and needs to be

// data that has been previously written in the transaction and needs to be

// rolled back).

// rolled back).

// gtm_thread::shared_state is used to store a transaction's current

// gtm_thread::shared_state is used to store a transaction's current

// snapshot time (or commit time). The serial lock uses ~0 for inactive

// snapshot time (or commit time). The serial lock uses ~0 for inactive

// transactions and 0 for active ones. Thus, we always have a meaningful

// transactions and 0 for active ones. Thus, we always have a meaningful

// timestamp in shared_state that can be used to implement quiescence-based

// timestamp in shared_state that can be used to implement quiescence-based

// privatization safety.

// privatization safety.

class ml_wt_dispatch : public abi_dispatch

class ml_wt_dispatch : public abi_dispatch

protected:

protected:

  static void pre_write(gtm_thread *tx, const void *addr, size_t len)

  static void pre_write(gtm_thread *tx, const void *addr, size_t len)

    gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);

    gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);

    gtm_word locked_by_tx = ml_mg::set_locked(tx);

    gtm_word locked_by_tx = ml_mg::set_locked(tx);

    // Lock all orecs that cover the region.

    // Lock all orecs that cover the region.

    size_t orec = ml_mg::get_orec(addr);

    size_t orec = ml_mg::get_orec(addr);

    size_t orec_end = ml_mg::get_orec_end(addr, len);

    size_t orec_end = ml_mg::get_orec_end(addr, len);

do

do

        // Load the orec.  Relaxed memory order is sufficient here because

        // Load the orec.  Relaxed memory order is sufficient here because

        // either we have acquired the orec or we will try to acquire it with

        // either we have acquired the orec or we will try to acquire it with

        // a CAS with stronger memory order.

        // a CAS with stronger memory order.

        gtm_word o = o_ml_mg.orecs[orec].load(memory_order_relaxed);

        gtm_word o = o_ml_mg.orecs[orec].load(memory_order_relaxed);

        // Check whether we have acquired the orec already.

        // Check whether we have acquired the orec already.

        if (likely (locked_by_tx != o))

        if (likely (locked_by_tx != o))

            // If not, acquire.  Make sure that our snapshot time is larger or

            // If not, acquire.  Make sure that our snapshot time is larger or

            // equal than the orec's version to avoid masking invalidations of

            // equal than the orec's version to avoid masking invalidations of

            // our snapshot with our own writes.

            // our snapshot with our own writes.

            if (unlikely (ml_mg::is_locked(o)))

            if (unlikely (ml_mg::is_locked(o)))

              tx->restart(RESTART_LOCKED_WRITE);

              tx->restart(RESTART_LOCKED_WRITE);

            if (unlikely (ml_mg::get_time(o) > snapshot))

            if (unlikely (ml_mg::get_time(o) > snapshot))

                // We only need to extend the snapshot if we have indeed read

                // We only need to extend the snapshot if we have indeed read

                // from this orec before.  Given that we are an update

                // from this orec before.  Given that we are an update

                // transaction, we will have to extend anyway during commit.

                // transaction, we will have to extend anyway during commit.

                // ??? Scan the read log instead, aborting if we have read

                // ??? Scan the read log instead, aborting if we have read

                // from data covered by this orec before?

                // from data covered by this orec before?

                snapshot = extend(tx);

                snapshot = extend(tx);

            // We need acquire memory order here to synchronize with other

            // We need acquire memory order here to synchronize with other

            // (ownership) releases of the orec.  We do not need acq_rel order

            // (ownership) releases of the orec.  We do not need acq_rel order

            // because whenever another thread reads from this CAS'

            // because whenever another thread reads from this CAS'

            // modification, then it will abort anyway and does not rely on

            // modification, then it will abort anyway and does not rely on

            // any further happens-before relation to be established.

            // any further happens-before relation to be established.

            if (unlikely (!o_ml_mg.orecs[orec].compare_exchange_strong(

            if (unlikely (!o_ml_mg.orecs[orec].compare_exchange_strong(

                o, locked_by_tx, memory_order_acquire)))

                o, locked_by_tx, memory_order_acquire)))

              tx->restart(RESTART_LOCKED_WRITE);

              tx->restart(RESTART_LOCKED_WRITE);

            // We use an explicit fence here to avoid having to use release

            // We use an explicit fence here to avoid having to use release

            // memory order for all subsequent data stores.  This fence will

            // memory order for all subsequent data stores.  This fence will

            // synchronize with loads of the data with acquire memory order.

            // synchronize with loads of the data with acquire memory order.

            // See post_load() for why this is necessary.

            // See post_load() for why this is necessary.

            // Adding require memory order to the prior CAS is not sufficient,

            // Adding require memory order to the prior CAS is not sufficient,

            // at least according to the Batty et al. formalization of the

            // at least according to the Batty et al. formalization of the

            // memory model.

            // memory model.

            atomic_thread_fence(memory_order_release);

            atomic_thread_fence(memory_order_release);

            // We log the previous value here to be able to use incarnation

            // We log the previous value here to be able to use incarnation

            // numbers when we have to roll back.

            // numbers when we have to roll back.

            // ??? Reserve capacity early to avoid capacity checks here?

            // ??? Reserve capacity early to avoid capacity checks here?

            gtm_rwlog_entry *e = tx->writelog.push();

            gtm_rwlog_entry *e = tx->writelog.push();

            e->orec = o_ml_mg.orecs + orec;

            e->orec = o_ml_mg.orecs + orec;

            e->value = o;

            e->value = o;

        orec = o_ml_mg.get_next_orec(orec);

        orec = o_ml_mg.get_next_orec(orec);

    while (orec != orec_end);

    while (orec != orec_end);

    // Do undo logging.  We do not know which region prior writes logged

    // Do undo logging.  We do not know which region prior writes logged

    // (even if orecs have been acquired), so just log everything.

    // (even if orecs have been acquired), so just log everything.

    tx->undolog.log(addr, len);

    tx->undolog.log(addr, len);

  static void pre_write(const void *addr, size_t len)

  static void pre_write(const void *addr, size_t len)

    gtm_thread *tx = gtm_thr();

    gtm_thread *tx = gtm_thr();

    pre_write(tx, addr, len);

    pre_write(tx, addr, len);

  // Returns true iff all the orecs in our read log still have the same time

  // Returns true iff all the orecs in our read log still have the same time

  // or have been locked by the transaction itself.

  // or have been locked by the transaction itself.

  static bool validate(gtm_thread *tx)

  static bool validate(gtm_thread *tx)

    gtm_word locked_by_tx = ml_mg::set_locked(tx);

    gtm_word locked_by_tx = ml_mg::set_locked(tx);

    // ??? This might get called from pre_load() via extend().  In that case,

    // ??? This might get called from pre_load() via extend().  In that case,

    // we don't really need to check the new entries that pre_load() is

    // we don't really need to check the new entries that pre_load() is

    // adding.  Stop earlier?

    // adding.  Stop earlier?

    for (gtm_rwlog_entry *i = tx->readlog.begin(), *ie = tx->readlog.end();

    for (gtm_rwlog_entry *i = tx->readlog.begin(), *ie = tx->readlog.end();

        i != ie; i++)

        i != ie; i++)

        // Relaxed memory order is sufficient here because we do not need to

        // Relaxed memory order is sufficient here because we do not need to

        // establish any new synchronizes-with relationships.  We only need

        // establish any new synchronizes-with relationships.  We only need

        // to read a value that is as least as current as enforced by the

        // to read a value that is as least as current as enforced by the

        // callers: extend() loads global time with acquire, and trycommit()

        // callers: extend() loads global time with acquire, and trycommit()

        // increments global time with acquire.  Therefore, we will see the

        // increments global time with acquire.  Therefore, we will see the

        // most recent orec updates before the global time that we load.

        // most recent orec updates before the global time that we load.

        gtm_word o = i->orec->load(memory_order_relaxed);

        gtm_word o = i->orec->load(memory_order_relaxed);

        // We compare only the time stamp and the lock bit here.  We know that

        // We compare only the time stamp and the lock bit here.  We know that

        // we have read only committed data before, so we can ignore

        // we have read only committed data before, so we can ignore

        // intermediate yet rolled-back updates presented by the incarnation

        // intermediate yet rolled-back updates presented by the incarnation

        // number bits.

        // number bits.

        if (ml_mg::get_time(o) != ml_mg::get_time(i->value)

        if (ml_mg::get_time(o) != ml_mg::get_time(i->value)

            && o != locked_by_tx)

            && o != locked_by_tx)

          return false;

          return false;

    return true;

    return true;

  // Tries to extend the snapshot to a more recent time.  Returns the new

  // Tries to extend the snapshot to a more recent time.  Returns the new

  // snapshot time and updates TX->SHARED_STATE.  If the snapshot cannot be

  // snapshot time and updates TX->SHARED_STATE.  If the snapshot cannot be

  // extended to the current global time, TX is restarted.

  // extended to the current global time, TX is restarted.

  static gtm_word extend(gtm_thread *tx)

  static gtm_word extend(gtm_thread *tx)

    // We read global time here, even if this isn't strictly necessary

    // We read global time here, even if this isn't strictly necessary

    // because we could just return the maximum of the timestamps that

    // because we could just return the maximum of the timestamps that

    // validate sees.  However, the potential cache miss on global time is

    // validate sees.  However, the potential cache miss on global time is

    // probably a reasonable price to pay for avoiding unnecessary extensions

    // probably a reasonable price to pay for avoiding unnecessary extensions

    // in the future.

    // in the future.

    // We need acquire memory oder because we have to synchronize with the

    // We need acquire memory oder because we have to synchronize with the

    // increment of global time by update transactions, whose lock

    // increment of global time by update transactions, whose lock

    // acquisitions we have to observe (also see trycommit()).

    // acquisitions we have to observe (also see trycommit()).

    gtm_word snapshot = o_ml_mg.time.load(memory_order_acquire);

    gtm_word snapshot = o_ml_mg.time.load(memory_order_acquire);

    if (!validate(tx))

    if (!validate(tx))

      tx->restart(RESTART_VALIDATE_READ);

      tx->restart(RESTART_VALIDATE_READ);

    // Update our public snapshot time.  Probably useful to decrease waiting

    // Update our public snapshot time.  Probably useful to decrease waiting

    // due to quiescence-based privatization safety.

    // due to quiescence-based privatization safety.

    // Use release memory order to establish synchronizes-with with the

    // Use release memory order to establish synchronizes-with with the

    // privatizers; prior data loads should happen before the privatizers

    // privatizers; prior data loads should happen before the privatizers

    // potentially modify anything.

    // potentially modify anything.

    tx->shared_state.store(snapshot, memory_order_release);

    tx->shared_state.store(snapshot, memory_order_release);

    return snapshot;

    return snapshot;

  // First pass over orecs.  Load and check all orecs that cover the region.

  // First pass over orecs.  Load and check all orecs that cover the region.

  // Write to read log, extend snapshot time if necessary.

  // Write to read log, extend snapshot time if necessary.

  static gtm_rwlog_entry* pre_load(gtm_thread *tx, const void* addr,

  static gtm_rwlog_entry* pre_load(gtm_thread *tx, const void* addr,

      size_t len)

      size_t len)

    // Don't obtain an iterator yet because the log might get resized.

    // Don't obtain an iterator yet because the log might get resized.

    size_t log_start = tx->readlog.size();

    size_t log_start = tx->readlog.size();

    gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);

    gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);

    gtm_word locked_by_tx = ml_mg::set_locked(tx);

    gtm_word locked_by_tx = ml_mg::set_locked(tx);

    size_t orec = ml_mg::get_orec(addr);

    size_t orec = ml_mg::get_orec(addr);

    size_t orec_end = ml_mg::get_orec_end(addr, len);

    size_t orec_end = ml_mg::get_orec_end(addr, len);

do

do

        // We need acquire memory order here so that this load will

        // We need acquire memory order here so that this load will

        // synchronize with the store that releases the orec in trycommit().

        // synchronize with the store that releases the orec in trycommit().

        // In turn, this makes sure that subsequent data loads will read from

        // In turn, this makes sure that subsequent data loads will read from

        // a visible sequence of side effects that starts with the most recent

        // a visible sequence of side effects that starts with the most recent

        // store to the data right before the release of the orec.

        // store to the data right before the release of the orec.

        gtm_word o = o_ml_mg.orecs[orec].load(memory_order_acquire);

        gtm_word o = o_ml_mg.orecs[orec].load(memory_order_acquire);

        if (likely (!ml_mg::is_more_recent_or_locked(o, snapshot)))

        if (likely (!ml_mg::is_more_recent_or_locked(o, snapshot)))

            success:

            success:

            gtm_rwlog_entry *e = tx->readlog.push();

            gtm_rwlog_entry *e = tx->readlog.push();

            e->orec = o_ml_mg.orecs + orec;

            e->orec = o_ml_mg.orecs + orec;

            e->value = o;

            e->value = o;

        else if (!ml_mg::is_locked(o))

        else if (!ml_mg::is_locked(o))

            // We cannot read this part of the region because it has been

            // We cannot read this part of the region because it has been

            // updated more recently than our snapshot time.  If we can extend

            // updated more recently than our snapshot time.  If we can extend

            // our snapshot, then we can read.

            // our snapshot, then we can read.

            snapshot = extend(tx);

            snapshot = extend(tx);

            goto success;

            goto success;

        else

        else

            // If the orec is locked by us, just skip it because we can just

            // If the orec is locked by us, just skip it because we can just

            // read from it.  Otherwise, restart the transaction.

            // read from it.  Otherwise, restart the transaction.

            if (o != locked_by_tx)

            if (o != locked_by_tx)

              tx->restart(RESTART_LOCKED_READ);

              tx->restart(RESTART_LOCKED_READ);

        orec = o_ml_mg.get_next_orec(orec);

        orec = o_ml_mg.get_next_orec(orec);

    while (orec != orec_end);

    while (orec != orec_end);

    return &tx->readlog[log_start];

    return &tx->readlog[log_start];

  // Second pass over orecs, verifying that the we had a consistent read.

  // Second pass over orecs, verifying that the we had a consistent read.

  // Restart the transaction if any of the orecs is locked by another

  // Restart the transaction if any of the orecs is locked by another

  // transaction.

  // transaction.

  static void post_load(gtm_thread *tx, gtm_rwlog_entry* log)

  static void post_load(gtm_thread *tx, gtm_rwlog_entry* log)

    for (gtm_rwlog_entry *end = tx->readlog.end(); log != end; log++)

    for (gtm_rwlog_entry *end = tx->readlog.end(); log != end; log++)

        // Check that the snapshot is consistent.  We expect the previous data

        // Check that the snapshot is consistent.  We expect the previous data

        // load to have acquire memory order, or be atomic and followed by an

        // load to have acquire memory order, or be atomic and followed by an

        // acquire fence.

        // acquire fence.

        // As a result, the data load will synchronize with the release fence

        // As a result, the data load will synchronize with the release fence

        // issued by the transactions whose data updates the data load has read

        // issued by the transactions whose data updates the data load has read

        // from.  This forces the orec load to read from a visible sequence of

        // from.  This forces the orec load to read from a visible sequence of

        // side effects that starts with the other updating transaction's

        // side effects that starts with the other updating transaction's

        // store that acquired the orec and set it to locked.

        // store that acquired the orec and set it to locked.

        // We therefore either read a value with the locked bit set (and

        // We therefore either read a value with the locked bit set (and

        // restart) or read an orec value that was written after the data had

        // restart) or read an orec value that was written after the data had

        // been written.  Either will allow us to detect inconsistent reads

        // been written.  Either will allow us to detect inconsistent reads

        // because it will have a higher/different value.

        // because it will have a higher/different value.

        // Also note that differently to validate(), we compare the raw value

        // Also note that differently to validate(), we compare the raw value

        // of the orec here, including incarnation numbers.  We must prevent

        // of the orec here, including incarnation numbers.  We must prevent

        // returning uncommitted data from loads (whereas when validating, we

        // returning uncommitted data from loads (whereas when validating, we

        // already performed a consistent load).

        // already performed a consistent load).

        gtm_word o = log->orec->load(memory_order_relaxed);

        gtm_word o = log->orec->load(memory_order_relaxed);

        if (log->value != o)

        if (log->value != o)

          tx->restart(RESTART_VALIDATE_READ);

          tx->restart(RESTART_VALIDATE_READ);

  template <typename V> static V load(const V* addr, ls_modifier mod)

  template <typename V> static V load(const V* addr, ls_modifier mod)

    // Read-for-write should be unlikely, but we need to handle it or will

    // Read-for-write should be unlikely, but we need to handle it or will

    // break later WaW optimizations.

    // break later WaW optimizations.

    if (unlikely(mod == RfW))

    if (unlikely(mod == RfW))

        pre_write(addr, sizeof(V));

        pre_write(addr, sizeof(V));

        return *addr;

        return *addr;

    if (unlikely(mod == RaW))

    if (unlikely(mod == RaW))

      return *addr;

      return *addr;

    // ??? Optimize for RaR?

    // ??? Optimize for RaR?

    gtm_thread *tx = gtm_thr();

    gtm_thread *tx = gtm_thr();

    gtm_rwlog_entry* log = pre_load(tx, addr, sizeof(V));

    gtm_rwlog_entry* log = pre_load(tx, addr, sizeof(V));

    // Load the data.

    // Load the data.

    // This needs to have acquire memory order (see post_load()).

    // This needs to have acquire memory order (see post_load()).

    // Alternatively, we can put an acquire fence after the data load but this

    // Alternatively, we can put an acquire fence after the data load but this

    // is probably less efficient.

    // is probably less efficient.

    // FIXME We would need an atomic load with acquire memory order here but

    // FIXME We would need an atomic load with acquire memory order here but

    // we can't just forge an atomic load for nonatomic data because this

    // we can't just forge an atomic load for nonatomic data because this

    // might not work on all implementations of atomics.  However, we need

    // might not work on all implementations of atomics.  However, we need

    // the acquire memory order and we can only establish this if we link

    // the acquire memory order and we can only establish this if we link

    // it to the matching release using a reads-from relation between atomic

    // it to the matching release using a reads-from relation between atomic

    // loads.  Also, the compiler is allowed to optimize nonatomic accesses

    // loads.  Also, the compiler is allowed to optimize nonatomic accesses

    // differently than atomic accesses (e.g., if the load would be moved to

    // differently than atomic accesses (e.g., if the load would be moved to

    // after the fence, we potentially don't synchronize properly anymore).

    // after the fence, we potentially don't synchronize properly anymore).

    // Instead of the following, just use an ordinary load followed by an

    // Instead of the following, just use an ordinary load followed by an

    // acquire fence, and hope that this is good enough for now:

    // acquire fence, and hope that this is good enough for now:

    // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire);

    // V v = atomic_load_explicit((atomic<V>*)addr, memory_order_acquire);

    V v = *addr;

    V v = *addr;

    atomic_thread_fence(memory_order_acquire);

    atomic_thread_fence(memory_order_acquire);

    // ??? Retry the whole load if it wasn't consistent?

    // ??? Retry the whole load if it wasn't consistent?

    post_load(tx, log);

    post_load(tx, log);

    return v;

    return v;

  template <typename V> static void store(V* addr, const V value,

  template <typename V> static void store(V* addr, const V value,

      ls_modifier mod)

      ls_modifier mod)

    if (likely(mod != WaW))

    if (likely(mod != WaW))

      pre_write(addr, sizeof(V));

      pre_write(addr, sizeof(V));

    // FIXME We would need an atomic store here but we can't just forge an

    // FIXME We would need an atomic store here but we can't just forge an

    // atomic load for nonatomic data because this might not work on all

    // atomic load for nonatomic data because this might not work on all

    // implementations of atomics.  However, we need this store to link the

    // implementations of atomics.  However, we need this store to link the

    // release fence in pre_write() to the acquire operation in load, which

    // release fence in pre_write() to the acquire operation in load, which

    // is only guaranteed if we have a reads-from relation between atomic

    // is only guaranteed if we have a reads-from relation between atomic

    // accesses.  Also, the compiler is allowed to optimize nonatomic accesses

    // accesses.  Also, the compiler is allowed to optimize nonatomic accesses

    // differently than atomic accesses (e.g., if the store would be moved

    // differently than atomic accesses (e.g., if the store would be moved

    // to before the release fence in pre_write(), things could go wrong).

    // to before the release fence in pre_write(), things could go wrong).

    // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed);

    // atomic_store_explicit((atomic<V>*)addr, value, memory_order_relaxed);

    *addr = value;

    *addr = value;

public:

public:

  static void memtransfer_static(void *dst, const void* src, size_t size,

  static void memtransfer_static(void *dst, const void* src, size_t size,

      bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)

      bool may_overlap, ls_modifier dst_mod, ls_modifier src_mod)

    gtm_rwlog_entry* log = 0;

    gtm_rwlog_entry* log = 0;

    gtm_thread *tx = 0;

    gtm_thread *tx = 0;

    if (src_mod == RfW)

    if (src_mod == RfW)

        tx = gtm_thr();

        tx = gtm_thr();

        pre_write(tx, src, size);

        pre_write(tx, src, size);

    else if (src_mod != RaW && src_mod != NONTXNAL)

    else if (src_mod != RaW && src_mod != NONTXNAL)

        tx = gtm_thr();

        tx = gtm_thr();

        log = pre_load(tx, src, size);

        log = pre_load(tx, src, size);

    // ??? Optimize for RaR?

    // ??? Optimize for RaR?

    if (dst_mod != NONTXNAL && dst_mod != WaW)

    if (dst_mod != NONTXNAL && dst_mod != WaW)

        if (src_mod != RfW && (src_mod == RaW || src_mod == NONTXNAL))

        if (src_mod != RfW && (src_mod == RaW || src_mod == NONTXNAL))

          tx = gtm_thr();

          tx = gtm_thr();

        pre_write(tx, dst, size);

        pre_write(tx, dst, size);

    // FIXME We should use atomics here (see store()).  Let's just hope that

    // FIXME We should use atomics here (see store()).  Let's just hope that

    // memcpy/memmove are good enough.

    // memcpy/memmove are good enough.

    if (!may_overlap)

    if (!may_overlap)

      ::memcpy(dst, src, size);

      ::memcpy(dst, src, size);

    else

    else

      ::memmove(dst, src, size);

      ::memmove(dst, src, size);

    // ??? Retry the whole memtransfer if it wasn't consistent?

    // ??? Retry the whole memtransfer if it wasn't consistent?

    if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL)

    if (src_mod != RfW && src_mod != RaW && src_mod != NONTXNAL)

        // See load() for why we need the acquire fence here.

        // See load() for why we need the acquire fence here.

        atomic_thread_fence(memory_order_acquire);

        atomic_thread_fence(memory_order_acquire);

        post_load(tx, log);

        post_load(tx, log);

  static void memset_static(void *dst, int c, size_t size, ls_modifier mod)

  static void memset_static(void *dst, int c, size_t size, ls_modifier mod)

    if (mod != WaW)

    if (mod != WaW)

      pre_write(dst, size);

      pre_write(dst, size);

    // FIXME We should use atomics here (see store()).  Let's just hope that

    // FIXME We should use atomics here (see store()).  Let's just hope that

    // memset is good enough.

    // memset is good enough.

    ::memset(dst, c, size);

    ::memset(dst, c, size);

  virtual gtm_restart_reason begin_or_restart()

  virtual gtm_restart_reason begin_or_restart()

    // We don't need to do anything for nested transactions.

    // We don't need to do anything for nested transactions.

    gtm_thread *tx = gtm_thr();

    gtm_thread *tx = gtm_thr();

    if (tx->parent_txns.size() > 0)

    if (tx->parent_txns.size() > 0)

      return NO_RESTART;

      return NO_RESTART;

    // Read the current time, which becomes our snapshot time.

    // Read the current time, which becomes our snapshot time.

    // Use acquire memory oder so that we see the lock acquisitions by update

    // Use acquire memory oder so that we see the lock acquisitions by update

    // transcations that incremented the global time (see trycommit()).

    // transcations that incremented the global time (see trycommit()).

    gtm_word snapshot = o_ml_mg.time.load(memory_order_acquire);

    gtm_word snapshot = o_ml_mg.time.load(memory_order_acquire);

    // Re-initialize method group on time overflow.

    // Re-initialize method group on time overflow.

    if (snapshot >= o_ml_mg.TIME_MAX)

    if (snapshot >= o_ml_mg.TIME_MAX)

      return RESTART_INIT_METHOD_GROUP;

      return RESTART_INIT_METHOD_GROUP;

    // We don't need to enforce any ordering for the following store. There

    // We don't need to enforce any ordering for the following store. There

    // are no earlier data loads in this transaction, so the store cannot

    // are no earlier data loads in this transaction, so the store cannot

    // become visible before those (which could lead to the violation of

    // become visible before those (which could lead to the violation of

    // privatization safety). The store can become visible after later loads

    // privatization safety). The store can become visible after later loads

    // but this does not matter because the previous value will have been

    // but this does not matter because the previous value will have been

    // smaller or equal (the serial lock will set shared_state to zero when

    // smaller or equal (the serial lock will set shared_state to zero when

    // marking the transaction as active, and restarts enforce immediate

    // marking the transaction as active, and restarts enforce immediate

    // visibility of a smaller or equal value with a barrier (see

    // visibility of a smaller or equal value with a barrier (see

    // rollback()).

    // rollback()).

    tx->shared_state.store(snapshot, memory_order_relaxed);

    tx->shared_state.store(snapshot, memory_order_relaxed);

    return NO_RESTART;

    return NO_RESTART;

  virtual bool trycommit(gtm_word& priv_time)

  virtual bool trycommit(gtm_word& priv_time)

    gtm_thread* tx = gtm_thr();

    gtm_thread* tx = gtm_thr();

    // If we haven't updated anything, we can commit.

    // If we haven't updated anything, we can commit.

    if (!tx->writelog.size())

    if (!tx->writelog.size())

        tx->readlog.clear();

        tx->readlog.clear();

        return true;

        return true;

    // Get a commit time.

    // Get a commit time.

    // Overflow of o_ml_mg.time is prevented in begin_or_restart().

    // Overflow of o_ml_mg.time is prevented in begin_or_restart().

    // We need acq_rel here because (1) the acquire part is required for our

    // We need acq_rel here because (1) the acquire part is required for our

    // own subsequent call to validate(), and the release part is necessary to

    // own subsequent call to validate(), and the release part is necessary to

    // make other threads' validate() work as explained there and in extend().

    // make other threads' validate() work as explained there and in extend().

    gtm_word ct = o_ml_mg.time.fetch_add(1, memory_order_acq_rel) + 1;

    gtm_word ct = o_ml_mg.time.fetch_add(1, memory_order_acq_rel) + 1;

    // Extend our snapshot time to at least our commit time.

    // Extend our snapshot time to at least our commit time.

    // Note that we do not need to validate if our snapshot time is right

    // Note that we do not need to validate if our snapshot time is right

    // before the commit time because we are never sharing the same commit

    // before the commit time because we are never sharing the same commit

    // time with other transactions.

    // time with other transactions.

    // No need to reset shared_state, which will be modified by the serial

    // No need to reset shared_state, which will be modified by the serial

    // lock right after our commit anyway.

    // lock right after our commit anyway.

    gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);

    gtm_word snapshot = tx->shared_state.load(memory_order_relaxed);

    if (snapshot < ct - 1 && !validate(tx))

    if (snapshot < ct - 1 && !validate(tx))

      return false;

      return false;

    // Release orecs.

    // Release orecs.

    // See pre_load() / post_load() for why we need release memory order.

    // See pre_load() / post_load() for why we need release memory order.

    // ??? Can we use a release fence and relaxed stores?

    // ??? Can we use a release fence and relaxed stores?

    gtm_word v = ml_mg::set_time(ct);

    gtm_word v = ml_mg::set_time(ct);

    for (gtm_rwlog_entry *i = tx->writelog.begin(), *ie = tx->writelog.end();

    for (gtm_rwlog_entry *i = tx->writelog.begin(), *ie = tx->writelog.end();

        i != ie; i++)

        i != ie; i++)

      i->orec->store(v, memory_order_release);

      i->orec->store(v, memory_order_release);

    // We're done, clear the logs.

    // We're done, clear the logs.

    tx->writelog.clear();

    tx->writelog.clear();

    tx->readlog.clear();

    tx->readlog.clear();

    // Need to ensure privatization safety. Every other transaction must

    // Need to ensure privatization safety. Every other transaction must

    // have a snapshot time that is at least as high as our commit time

    // have a snapshot time that is at least as high as our commit time

    // (i.e., our commit must be visible to them).

    // (i.e., our commit must be visible to them).

    priv_time = ct;

    priv_time = ct;

    return true;

    return true;

  virtual void rollback(gtm_transaction_cp *cp)

  virtual void rollback(gtm_transaction_cp *cp)

    // We don't do anything for rollbacks of nested transactions.

    // We don't do anything for rollbacks of nested transactions.

    // ??? We could release locks here if we snapshot writelog size.  readlog

    // ??? We could release locks here if we snapshot writelog size.  readlog

    // is similar.  This is just a performance optimization though.  Nested

    // is similar.  This is just a performance optimization though.  Nested

    // aborts should be rather infrequent, so the additional save/restore

    // aborts should be rather infrequent, so the additional save/restore

    // overhead for the checkpoints could be higher.

    // overhead for the checkpoints could be higher.

    if (cp != 0)

    if (cp != 0)

      return;

      return;

    gtm_thread *tx = gtm_thr();

    gtm_thread *tx = gtm_thr();

    gtm_word overflow_value = 0;

    gtm_word overflow_value = 0;

    // Release orecs.

    // Release orecs.

    for (gtm_rwlog_entry *i = tx->writelog.begin(), *ie = tx->writelog.end();

    for (gtm_rwlog_entry *i = tx->writelog.begin(), *ie = tx->writelog.end();

        i != ie; i++)

        i != ie; i++)

        // If possible, just increase the incarnation number.

        // If possible, just increase the incarnation number.

        // See pre_load() / post_load() for why we need release memory order.

        // See pre_load() / post_load() for why we need release memory order.

        // ??? Can we use a release fence and relaxed stores?  (Same below.)

        // ??? Can we use a release fence and relaxed stores?  (Same below.)

        if (ml_mg::has_incarnation_left(i->value))

        if (ml_mg::has_incarnation_left(i->value))

          i->orec->store(ml_mg::inc_incarnation(i->value),

          i->orec->store(ml_mg::inc_incarnation(i->value),

              memory_order_release);

              memory_order_release);

        else

        else

            // We have an incarnation overflow.  Acquire a new timestamp, and

            // We have an incarnation overflow.  Acquire a new timestamp, and

            // use it from now on as value for each orec whose incarnation

            // use it from now on as value for each orec whose incarnation

            // number cannot be increased.

            // number cannot be increased.

            // Overflow of o_ml_mg.time is prevented in begin_or_restart().

            // Overflow of o_ml_mg.time is prevented in begin_or_restart().

            // See pre_load() / post_load() for why we need release memory

            // See pre_load() / post_load() for why we need release memory

            // order.

            // order.

            if (!overflow_value)

            if (!overflow_value)

              // Release memory order is sufficient but required here.

              // Release memory order is sufficient but required here.

              // In contrast to the increment in trycommit(), we need release

              // In contrast to the increment in trycommit(), we need release

              // for the same reason but do not need the acquire because we

              // for the same reason but do not need the acquire because we

              // do not validate subsequently.

              // do not validate subsequently.

              overflow_value = ml_mg::set_time(

              overflow_value = ml_mg::set_time(

                  o_ml_mg.time.fetch_add(1, memory_order_release) + 1);

                  o_ml_mg.time.fetch_add(1, memory_order_release) + 1);

            i->orec->store(overflow_value, memory_order_release);

            i->orec->store(overflow_value, memory_order_release);

    // We need this release fence to ensure that privatizers see the

    // We need this release fence to ensure that privatizers see the

    // rolled-back original state (not any uncommitted values) when they read

    // rolled-back original state (not any uncommitted values) when they read

    // the new snapshot time that we write in begin_or_restart().

    // the new snapshot time that we write in begin_or_restart().

    atomic_thread_fence(memory_order_release);

    atomic_thread_fence(memory_order_release);

    // We're done, clear the logs.

    // We're done, clear the logs.

    tx->writelog.clear();

    tx->writelog.clear();

    tx->readlog.clear();

    tx->readlog.clear();

  virtual bool supports(unsigned number_of_threads)

  virtual bool supports(unsigned number_of_threads)

    // Each txn can commit and fail and rollback once before checking for

    // Each txn can commit and fail and rollback once before checking for

    // overflow, so this bounds the number of threads that we can support.

    // overflow, so this bounds the number of threads that we can support.

    // In practice, this won't be a problem but we check it anyway so that

    // In practice, this won't be a problem but we check it anyway so that

    // we never break in the occasional weird situation.

    // we never break in the occasional weird situation.

    return (number_of_threads * 2 <= ml_mg::OVERFLOW_RESERVE);

    return (number_of_threads * 2 <= ml_mg::OVERFLOW_RESERVE);

  CREATE_DISPATCH_METHODS(virtual, )

  CREATE_DISPATCH_METHODS(virtual, )

  CREATE_DISPATCH_METHODS_MEM()

  CREATE_DISPATCH_METHODS_MEM()

  ml_wt_dispatch() : abi_dispatch(false, true, false, false, &o_ml_mg)

  ml_wt_dispatch() : abi_dispatch(false, true, false, false, &o_ml_mg)

{ }

{ }

};

};

} // anon namespace

} // anon namespace

static const ml_wt_dispatch o_ml_wt_dispatch;

static const ml_wt_dispatch o_ml_wt_dispatch;

abi_dispatch *

abi_dispatch *

GTM::dispatch_ml_wt ()

GTM::dispatch_ml_wt ()

  return const_cast<ml_wt_dispatch *>(&o_ml_wt_dispatch);

  return const_cast<ml_wt_dispatch *>(&o_ml_wt_dispatch);

Browse

Tools

Subversion Repositories openrisc

[/] [openrisc/] [tags/] [gnu-dev/] [fsf-gcc-snapshot-1-mar-12/] [or1k-gcc/] [libitm/] [method-ml.cc] - Diff between revs 737 and 783