OpenCores
URL https://opencores.org/ocsvn/openrisc/openrisc/trunk

Subversion Repositories openrisc

[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [testsuite/] [go.test/] [test/] [bench/] [shootout/] [timing.log] - Rev 700

Compare with Previous | Blame | View Log

All tests on r45 or r70

Aug 3 2009

First version of fasta. Translation of fasta.c, fetched from
        http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4

fasta -n 25000000
        gcc -O2 fasta.c 5.98u 0.00s 6.01r
        gccgo -O2 fasta.go      8.82u 0.02s 8.85r
        6g fasta.go     13.50u 0.02s 13.53r
        6g -B fata.go   12.99u 0.02s 13.02r

Aug 4 2009
[added timing.sh]

# myrandom:
#   hand-written optimization of integer division
#   use int32->float conversion
fasta -n 25000000
        # probably I/O library inefficiencies
        gcc -O2 fasta.c 5.99u 0.00s 6.00r 
        gccgo -O2 fasta.go      8.82u 0.02s 8.85r
        gc fasta        10.70u 0.00s 10.77r
        gc_B fasta      10.09u 0.03s 10.12r

reverse-complement < output-of-fasta-25000000
        # we don't know - memory cache behavior?
        gcc -O2 reverse-complement.c    2.04u 0.94s 10.54r
        gccgo -O2 reverse-complement.go 6.54u 0.63s 7.17r
        gc reverse-complement   6.55u 0.70s 7.26r
        gc_B reverse-complement 6.32u 0.70s 7.10r

nbody 50000000
        # math.Sqrt needs to be in assembly; inlining is probably the other 50%
        gcc -O2 nbody.c 21.61u 0.01s 24.80r
        gccgo -O2 nbody.go      118.55u 0.02s 120.32r
        gc nbody        100.84u 0.00s 100.85r
        gc_B nbody      103.33u 0.00s 103.39r
[
hacked Sqrt in assembler
        gc nbody        31.97u 0.00s 32.01r
]

binary-tree 15 # too slow to use 20
        # memory allocation and garbage collection
        gcc -O2 binary-tree.c -lm       0.86u 0.00s 0.87r
        gccgo -O2 binary-tree.go        1.69u 0.46s 2.15r
        gccgo -O2 binary-tree-freelist.go       8.48u 0.00s 8.48r
        gc binary-tree  9.60u 0.01s 9.62r
        gc binary-tree-freelist 0.48u 0.01s 0.50r

August 5, 2009

fannkuch 12
        # bounds checking is half the difference
        # rest might be registerization
        gcc -O2 fannkuch.c      60.09u 0.01s 60.32r
        gccgo -O2 fannkuch.go   64.89u 0.00s 64.92r
        gc fannkuch     124.59u 0.00s 124.67r
        gc_B fannkuch   91.14u 0.00s 91.16r

regex-dna 100000
        # regexp code is slow on trivial regexp
        gcc -O2 regex-dna.c -lpcre      0.92u 0.00s 0.99r
        gc regexp-dna   26.94u 0.18s 28.75r
        gc_B regexp-dna 26.51u 0.09s 26.75r

spectral-norm 5500
        gcc -O2 spectral-norm.c -lm     11.54u 0.00s 11.55r
        gccgo -O2 spectral-norm.go      12.20u 0.00s 12.23r
        gc spectral-norm        50.23u 0.00s 50.36r
        gc_B spectral-norm      49.69u 0.01s 49.83r
        gc spectral-norm-parallel       24.47u 0.03s 11.05r  # has shift >>1 not div /2
        [using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r]

August 6, 2009

k-nucleotide 5000000
        # string maps are slower than glib string maps
        gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0   k-nucleotide.c: 10.72u 0.01s 10.74r
        gccgo -O2 k-nucleotide.go       21.64u 0.83s 22.78r
        gc k-nucleotide 16.08u 0.06s 16.50r
        gc_B k-nucleotide       17.32u 0.02s 17.37r

mandelbrot 5500
        # floating point code generator should use more registers
        gcc -O2 mandelbrot.c    56.13u 0.02s 56.17r
        gccgo -O2 mandelbrot.go 57.49u 0.01s 57.51r
        gc mandelbrot   74.32u 0.00s 74.35r
        gc_B mandelbrot 74.28u 0.01s 74.31r

meteor 2100
        # we don't know
        gcc -O2 meteor-contest.c        0.10u 0.00s 0.10r
        gccgo -O2 meteor-contest.go     0.12u 0.00s 0.14r
        gc meteor-contest       0.24u 0.00s 0.26r
        gc_B meteor-contest     0.23u 0.00s 0.24r

pidigits 10000
        # bignum is slower than gmp
        gcc -O2 pidigits.c -lgmp        2.60u 0.00s 2.62r
        gc pidigits     77.69u 0.14s 78.18r
        gc_B pidigits   74.26u 0.18s 75.41r
        gc_B pidigits   68.48u 0.20s 69.31r   # special case: no bounds checking in bignum

August 7 2009

# New gc does better division by powers of 2.  Significant improvements:

spectral-norm 5500
        # floating point code generator should use more registers; possibly inline evalA
        gcc -O2 spectral-norm.c -lm     11.50u 0.00s 11.50r
        gccgo -O2 spectral-norm.go      12.02u 0.00s 12.02r
        gc spectral-norm        23.98u 0.00s 24.00r     # new time is 0.48 times old time, 52% faster
        gc_B spectral-norm      23.71u 0.01s 23.72r     # ditto
        gc spectral-norm-parallel       24.04u 0.00s 6.26r  # /2 put back.  note: 4x faster (on r70, idle)

k-nucleotide 1000000
        # string maps are slower than glib string maps
        gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0   10.82u 0.04s 10.87r
        gccgo -O2 k-nucleotide.go       22.73u 0.89s 23.63r
        gc k-nucleotide 15.97u 0.03s 16.04r
        gc_B k-nucleotide       15.86u 0.06s 15.93r     # 8.5% faster, but probably due to weird cache effeccts in previous version

pidigits 10000
        # bignum is slower than gmp
        gcc -O2 pidigits.c -lgmp        2.58u 0.00s 2.58r
        gc pidigits     71.24u 0.04s 71.28r     # 8.5% faster
        gc_B pidigits   71.25u 0.03s 71.29r     # 4% faster

threadring 50000000
        gcc -O2 threadring.c -lpthread  35.51u 160.21s 199.50r
        gccgo -O2 threadring.go 90.33u 459.95s 448.03r
        gc threadring   33.11u 0.00s 33.14r
        GOMAXPROCS=4 gc threadring      114.48u 226.65s 371.59r
        # change wait code to do <-make(chan int) instead of time.Sleep
        gc threadring   28.41u 0.01s 29.35r
        GOMAXPROCS=4 gc threadring      112.59u 232.83s 384.72r
        
chameneos 6000000
        gcc -O2 chameneosredux.c -lpthread      18.14u 276.52s 76.93r
        gc chameneosredux       20.19u 0.01s 20.23r

Aug 10 2009

# new 6g with better fp registers, fast div and mod of integers
# complete set of timings listed. significant changes marked ***

fasta -n 25000000
        # probably I/O library inefficiencies
        gcc -O2 fasta.c 5.96u 0.00s 5.97r
        gc fasta        10.59u 0.01s 10.61r
        gc_B fasta      9.92u 0.02s 9.95r

reverse-complement < output-of-fasta-25000000
        # we don't know - memory cache behavior?
        gcc -O2 reverse-complement.c    1.96u 1.56s 16.23r
        gccgo -O2 reverse-complement.go 6.41u 0.62s 7.05r
        gc reverse-complement   6.46u 0.70s 7.17r
        gc_B reverse-complement 6.22u 0.72s 6.95r

nbody 50000000
        # math.Sqrt needs to be in assembly; inlining is probably the other 50%
        gcc -O2 nbody.c 21.26u 0.01s 21.28r
        gccgo -O2 nbody.go      116.68u 0.07s 116.80r
        gc nbody        86.64u 0.01s 86.68r     # -14%
        gc_B nbody      85.72u 0.02s 85.77r     # *** -17%

binary-tree 15 # too slow to use 20
        # memory allocation and garbage collection
        gcc -O2 binary-tree.c -lm       0.87u 0.00s 0.87r
        gccgo -O2 binary-tree.go        1.61u 0.47s 2.09r
        gccgo -O2 binary-tree-freelist.go       0.00u 0.00s 0.01r
        gc binary-tree  9.11u 0.01s 9.13r       # *** -5%
        gc binary-tree-freelist 0.47u 0.01s 0.48r

fannkuch 12
        # bounds checking is half the difference
        # rest might be registerization
        gcc -O2 fannkuch.c      59.92u 0.00s 59.94r
        gccgo -O2 fannkuch.go   65.54u 0.00s 65.58r
        gc fannkuch     123.98u 0.01s 124.04r
        gc_B fannkuch   90.75u 0.00s 90.78r

regex-dna 100000
        # regexp code is slow on trivial regexp
        gcc -O2 regex-dna.c -lpcre      0.91u 0.00s 0.92r
        gc regex-dna    27.25u 0.02s 27.28r
        gc_B regex-dna  29.51u 0.03s 29.55r

spectral-norm 5500
        # possibly inline evalA
        gcc -O2 spectral-norm.c -lm     11.57u 0.00s 11.57r
        gccgo -O2 spectral-norm.go      12.07u 0.01s 12.08r
        gc spectral-norm        23.99u 0.00s 24.00r
        gc_B spectral-norm      23.73u 0.00s 23.75r

k-nucleotide 1000000
        # string maps are slower than glib string maps
        gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0   10.63u 0.02s 10.69r
        gccgo -O2 k-nucleotide.go       23.19u 0.91s 24.12r
        gc k-nucleotide 16.73u 0.04s 16.78r     # *** +5% (but this one seems to vary by more than that)
        gc_B k-nucleotide       16.46u 0.04s 16.51r     # *** +5%

mandelbrot 16000
        gcc -O2 mandelbrot.c    56.16u 0.00s 56.16r
        gccgo -O2 mandelbrot.go 57.41u 0.01s 57.42r
        gc mandelbrot   64.05u 0.02s 64.08r     # *** -14%
        gc_B mandelbrot 64.10u 0.02s 64.14r     # *** -14%

meteor 2100
        # we don't know
        gcc -O2 meteor-contest.c        0.10u 0.00s 0.10r
        gccgo -O2 meteor-contest.go     0.12u 0.00s 0.12r
        gc meteor-contest       0.18u 0.00s 0.20r       # *** -25%
        gc_B meteor-contest     0.17u 0.00s 0.18r       # *** -24%

pidigits 10000
        # bignum is slower than gmp
        gcc -O2 pidigits.c -lgmp        2.57u 0.00s 2.57r
        gc pidigits     71.82u 0.04s 71.89r
        gc_B pidigits   71.84u 0.08s 71.98r

threadring 50000000
        gcc -O2 threadring.c -lpthread  30.91u 164.33s 204.57r
        gccgo -O2 threadring.go 87.12u 460.04s 447.61r
        gc threadring   38.55u 0.00s 38.56r     # *** +16%

chameneos 6000000
        gcc -O2 chameneosredux.c -lpthread      17.93u 323.65s 88.47r
        gc chameneosredux       21.72u 0.00s 21.73r

August 10 2009

# In-place versions for some bignum operations.
pidigits 10000
        gcc -O2 pidigits.c -lgmp        2.56u 0.00s 2.57r
        gc pidigits     55.22u 0.04s 55.29r     # *** -23%
        gc_B pidigits   55.49u 0.02s 55.60r     # *** -23%

September 3 2009

# New 6g inlines slices, has a few other tweaks.
# Complete rerun. Significant changes marked.

fasta -n 25000000
        # probably I/O library inefficiencies
        gcc -O2 fasta.c 5.96u 0.00s 5.96r
        gc fasta        10.63u 0.02s 10.66r
        gc_B fasta      9.92u 0.01s 9.94r

reverse-complement < output-of-fasta-25000000
        # we don't know - memory cache behavior?
        gcc -O2 reverse-complement.c    1.92u 0.33s 2.93r
        gccgo -O2 reverse-complement.go 6.76u 0.72s 7.58r       # +5%
        gc reverse-complement   6.59u 0.70s 7.29r       # +2%
        gc_B reverse-complement 5.57u 0.80s 6.37r       # -10%

nbody 50000000
        # math.Sqrt needs to be in assembly; inlining is probably the other 50%
        # also loop alignment appears to be critical
        gcc -O2 nbody.c 21.28u 0.00s 21.28r
        gccgo -O2 nbody.go      119.21u 0.00s 119.22r   # +2%
        gc nbody        109.72u 0.00s 109.78r   # + 28% *****
        gc_B nbody      85.90u 0.00s 85.91r

binary-tree 15 # too slow to use 20
        # memory allocation and garbage collection
        gcc -O2 binary-tree.c -lm       0.86u 0.00s 0.87r
        gccgo -O2 binary-tree.go        1.88u 0.54s 2.42r       # +17%
        gccgo -O2 binary-tree-freelist.go       0.01u 0.01s 0.02r
        gc binary-tree  8.94u 0.01s 8.96r       # -2%
        gc binary-tree-freelist 0.47u 0.01s 0.48r

fannkuch 12
        # bounds checking is half the difference
        # rest might be registerization
        gcc -O2 fannkuch.c      60.12u 0.00s 60.12r
        gccgo -O2 fannkuch.go   92.62u 0.00s 92.66r             # +41% ***
        gc fannkuch     123.90u 0.00s 123.92r
        gc_B fannkuch   89.71u 0.00s 89.74r     # -1%

regex-dna 100000
        # regexp code is slow on trivial regexp
        gcc -O2 regex-dna.c -lpcre      0.88u 0.00s 0.88r
        gc regex-dna    25.77u 0.01s 25.79r             # -5%
        gc_B regex-dna  26.05u 0.02s 26.09r     # -12% ***

spectral-norm 5500
        # possibly inline evalA
        gcc -O2 spectral-norm.c -lm     11.51u 0.00s 11.51r
        gccgo -O2 spectral-norm.go      11.95u 0.00s 11.96r
        gc spectral-norm        24.23u 0.00s 24.23r
        gc_B spectral-norm      23.83u 0.00s 23.84r

k-nucleotide 1000000
        # string maps are slower than glib string maps
        gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0   10.68u 0.04s 10.72r
        gccgo -O2 k-nucleotide.go       23.03u 0.88s 23.92r
        gc k-nucleotide 15.79u 0.05s 15.85r     # -5% (but this one seems to vary by more than that)
        gc_B k-nucleotide       17.88u 0.05s 17.95r # +8% (ditto)

mandelbrot 16000
        gcc -O2 mandelbrot.c    56.17u 0.02s 56.20r
        gccgo -O2 mandelbrot.go 56.74u 0.02s 56.79r      # -1%
        gc mandelbrot   63.31u 0.01s 63.35r     # -1%
        gc_B mandelbrot 63.29u 0.00s 63.31r     # -1%

meteor 2100
        # we don't know
        gcc -O2 meteor-contest.c        0.10u 0.00s 0.10r
        gccgo -O2 meteor-contest.go     0.11u 0.00s 0.12r
        gc meteor-contest       0.18u 0.00s 0.19r
        gc_B meteor-contest     0.17u 0.00s 0.18r

pidigits 10000
        # bignum is slower than gmp
        gcc -O2 pidigits.c -lgmp        2.56u 0.00s 2.57r
        gc pidigits     55.87u 0.03s 55.91r
        gc_B pidigits   55.93u 0.03s 55.99r

# these tests are compared using real time, since they run multiple processors
# accuracy probably low
threadring 50000000
        gcc -O2 threadring.c -lpthread  26.31u 164.69s 199.92r  # -2%
        gccgo -O2 threadring.go 87.90u 487.26s 472.81r  # +6%
        gc threadring   28.89u 0.00s 28.90r     # -25% ***

chameneos 6000000
        gcc -O2 chameneosredux.c -lpthread      16.41u 296.91s 81.17r   # -8%
        gc chameneosredux       19.97u 0.00s 19.97r     # -8%

Sep 22, 2009

# 6g inlines sliceslice in most cases.

fasta -n 25000000
        # probably I/O library inefficiencies
        gc fasta        10.24u 0.00s 10.25r     # -4%
        gc_B fasta      9.68u 0.01s 9.69r       # -3%

reverse-complement < output-of-fasta-25000000
        # we don't know - memory cache behavior?
        gc reverse-complement   6.67u 0.69s 7.37r       # +1%
        gc_B reverse-complement 6.00u 0.64s 6.65r       # +7%

nbody -n 50000000
        # math.Sqrt needs to be in assembly; inlining is probably the other 50%
        # also loop alignment appears to be critical
        gc nbody        86.27u 0.00s 86.29r     # -21%
        gc_B nbody      104.52u 0.00s 104.54r   # +22%

fannkuch 12
        # bounds checking is half the difference
        # rest might be registerization
        gc fannkuch     128.36u 0.00s 128.37r   # +4%
        gc_B fannkuch   89.32u 0.00s 89.34r

regex-dna 100000
        # regexp code is slow on trivial regexp
        gc regex-dna    24.82u 0.01s 24.86r     # -4%
        gc_B regex-dna  24.55u 0.01s 24.57r     # -6%

spectral-norm 5500
        # possibly inline evalA
        gc spectral-norm        24.05u 0.00s 24.07r     # -1%
        gc_B spectral-norm      23.60u 0.00s 23.65r      # -1%

k-nucleotide 1000000
        # string maps are slower than glib string maps
        gc k-nucleotide 17.84u 0.04s 17.89r     # +13% but mysterious variation continues
        gc_B k-nucleotide       15.56u 0.08s 15.65r     # -13% (ditto)

mandelbrot 16000
        gc mandelbrot   64.08u 0.01s 64.11r     # +1%
        gc_B mandelbrot 64.04u 0.00s 64.05r     # +1%

pidigits 10000
        # bignum is slower than gmp
        gc pidigits     58.68u 0.02s 58.72r     # +5%
        gc_B pidigits   58.86u 0.05s 58.99r     # +5%

# these tests are compared using real time, since they run multiple processors
# accuracy probably low
threadring 50000000
        gc threadring   32.70u 0.02s 32.77r     # +13%

chameneos 6000000
        gc chameneosredux       26.62u 0.00s 26.63r     # +13%

Sep 24, 2009

# Sqrt now in assembler for 6g.
nbody -n 50000000
        # remember, at least for 6g, alignment of loops may be important
        gcc -O2 nbody.c 21.24u 0.00s 21.25r
        gccgo -O2 nbody.go      121.03u 0.00s 121.04r
        gc nbody        30.26u 0.00s 30.27r     # -65% ***
        gc_B nbody      30.20u 0.02s 30.22r     # -72% *** 

Nov 13 2009

# fix bug in regexp; take performance hit.  good regexps will come in time.
regex-dna 100000
        gcc -O2 regex-dna.c -lpcre      0.92u 0.00s 0.94r
        gc regex-dna    29.78u 0.03s 29.83r
        gc_B regex-dna  32.63u 0.03s 32.74r

Nov 24 2009

# Roger Peppe's rewrite of the benchmark
chameneos 6000000
        gcc -O2 chameneosredux.c -lpthread      18.00u 303.29s 83.64r
        gc chameneosredux       12.10u 0.00s 12.10r  # 2.22X faster

Jan 6, 2010

# Long-overdue update.  All numbers included in this complete run.
# Some programs (e.g. reverse-complement) rewritten for speed.
# Regular expressions much faster in common cases (although still far behind PCRE)
# Bignum stuff improved
# Better (but sometimes slower) locking in channels.

fasta -n 25000000
        gcc -O2 fasta.c 5.99u 0.01s 6.00r
        gc fasta        9.11u 0.00s 9.12r       # -11%
        gc_B fasta      8.60u 0.00s 8.62r       # +12% ??

reverse-complement < output-of-fasta-25000000
        gcc -O2 reverse-complement.c    2.00u 0.80s 9.54r
#       gccgo -O2 reverse-complement.go 4.57u 0.35s 4.94r       # 33% faster
        gc reverse-complement   2.01u 0.38s 2.40r       # 3.3X faster
        gc_B reverse-complement 1.88u 0.36s 2.24r       # 3.2X faster
GOGC=off
        gc reverse-complement   2.01u 0.35s 2.37r
        gc_B reverse-complement 1.86u 0.32s 2.19r

nbody -n 50000000
        gcc -O2 nbody.c 21.28u 0.00s 21.31r
        gccgo -O2 nbody.go      80.02u 0.00s 80.05r     # 33% faster
        gc nbody        30.13u 0.00s 30.13r
        gc_B nbody      29.89u 0.01s 29.91r

binary-tree 15 # too slow to use 20
        gcc -O2 binary-tree.c -lm       0.86u 0.00s 0.87r
        gccgo -O2 binary-tree.go        4.82u 0.41s 5.24r       # 2.5X slower
        gc binary-tree  7.23u 0.01s 7.25r       # # -19%
        gc binary-tree-freelist 0.43u 0.00s 0.44r       # -9%

fannkuch 12
        gcc -O2 fannkuch.c      60.17u 0.00s 60.17r
        gccgo -O2 fannkuch.go   78.47u 0.01s 78.49r
        gc fannkuch     128.86u 0.00s 128.96r
        gc_B fannkuch   90.17u 0.00s 90.21r

regex-dna 100000
        gcc -O2 regex-dna.c -lpcre      0.90u 0.00s 0.92r
        gc regex-dna    9.48u 0.01s 9.50r       # 3.1X faster
        gc_B regex-dna  9.08u 0.00s 9.10r       # 3.6X faster

spectral-norm 5500
        gcc -O2 spectral-norm.c -lm     11.48u 0.00s 11.48r
        gccgo -O2 spectral-norm.go      11.68u 0.00s 11.70r
        gc spectral-norm        23.98u 0.00s 23.99r
        gc_B spectral-norm      23.68u 0.00s 23.69r

k-nucleotide 1000000
        gcc -O2 k-nucleotide.c  10.85u 0.04s 10.90r
        gccgo -O2 k-nucleotide.go       25.26u 0.87s 26.14r
        gc k-nucleotide 15.28u 0.06s 15.37r     # restored; mysterious variation continues
        gc_B k-nucleotide       15.97u 0.03s 16.00r

mandelbrot 16000
        gcc -O2 mandelbrot.c    56.12u 0.01s 56.15r
        gccgo -O2 mandelbrot.go 56.86u 0.01s 56.89r
        gc mandelbrot   66.05u 0.00s 66.07r     # -3%
        gc_B mandelbrot 66.06u 0.00s 66.07r     # -3%

meteor 2100
        gcc -O2 meteor-contest.c        0.10u 0.00s 0.10r
        gccgo -O2 meteor-contest.go     0.12u 0.00s 0.12r
        gc meteor-contest       0.17u 0.00s 0.17r
        gc_B meteor-contest     0.15u 0.00s 0.16r

pidigits 10000
        gcc -O2 pidigits.c -lgmp        2.57u 0.00s 2.59r
        gc pidigits     38.27u 0.02s 38.30r     # 1.5X faster
        gc_B pidigits   38.27u 0.02s 38.31r     # 1.5X faster

threadring 50000000
        gcc -O2 threadring.c    37.11u 170.59s 212.75r
        gccgo -O2 threadring.go 89.67u 447.56s 442.55r  # -6.5%
        gc threadring   36.08u 0.04s 36.15r     # +10%

chameneos 6000000
        gcc -O2 chameneosredux.c -lpthread      19.02u 331.08s 90.79r
        gc chameneosredux       12.54u 0.00s 12.55r

Oct 19, 2010

# Another long-overdue update. Some of the code is new; parallel versions
# of some are added.  A few significant improvements.

fasta -n 25000000
        gcc -O2 fasta.c 4.92u 0.00s 4.93r
        gccgo -O2 fasta.go      3.31u 0.00s 3.34r  # new code
        gc fasta        3.68u 0.00s 3.69r  # 2.5X faster with no code
        gc_B fasta      3.68u 0.00s 3.69r  # 2.3X faster with no code

reverse-complement < output-of-fasta-25000000
        gcc -O2 reverse-complement.c    1.93u 0.81s 11.24r
        gccgo -O2 reverse-complement.go 1.58u 0.43s 2.04r  # first run with new code?
        gc reverse-complement   1.84u 0.34s 2.20r  # 10% faster
        gc_B reverse-complement 1.85u 0.32s 2.18r

nbody -n 50000000
        gcc -O2 nbody.c 21.35u 0.00s 21.36r
        gccgo -O2 nbody.go      21.62u 0.00s 21.66r  # 3.7X faster - why??
        gc nbody        29.78u 0.00s 29.79r
        gc_B nbody      29.72u 0.00s 29.72r

binary-tree 15 # too slow to use 20
        gcc -O2 binary-tree.c -lm       0.86u 0.00s 0.88r
        gccgo -O2 binary-tree.go        4.05u 0.02s 4.08r  # 28% faster
        gccgo -O2 binary-tree-freelist  0.34u 0.08s 0.34r
        gc binary-tree  5.94u 0.00s 5.95r  # 20% faster
        gc binary-tree-freelist 0.50u 0.01s 0.54r

fannkuch 12
        gcc -O2 fannkuch.c      60.45u 0.00s 60.45r
        gccgo -O2 fannkuch.go   64.64u 0.00s 64.64r
        gccgo -O2 fannkuch-parallel.go  115.63u 0.00s 31.58r
        gc fannkuch     126.52u 0.04s 126.68r
        gc fannkuch-parallel    238.82u 0.10s 65.93r  # GOMAXPROCS=4
        gc_B fannkuch   88.99u 0.00s 89.02r

regex-dna 100000
        gcc -O2 regex-dna.c -lpcre      0.89u 0.00s 0.89r
        gc regex-dna    8.99u 0.02s 9.03r
        gc regex-dna-parallel   8.94u 0.02s 3.68r  # GOMAXPROCS=4
        gc_B regex-dna  9.12u 0.00s 9.14r

spectral-norm 5500
        gcc -O2 spectral-norm.c -lm     11.55u 0.00s 11.57r
        gccgo -O2 spectral-norm.go      11.73u 0.00s 11.75r
        gc spectral-norm        23.74u 0.00s 23.79r
        gc_B spectral-norm      24.49u 0.02s 24.54r

k-nucleotide 1000000
        gcc -O2 k-nucleotide.c  11.44u 0.06s 11.50r
        gccgo -O2 k-nucleotide.go       8.65u 0.04s 8.71r
        gccgo -O2 k-nucleotide-parallel.go      8.75u 0.03s 2.97r # set GOMAXPROCS=4
        gc k-nucleotide 14.92u 0.05s 15.01r
        gc k-nucleotide-parallel        16.96u 0.06s 6.53r  # set GOMAXPROCS=4
        gc_B k-nucleotide       15.97u 0.03s 16.08r

mandelbrot 16000
        gcc -O2 mandelbrot.c    56.32u 0.00s 56.35r
        gccgo -O2 mandelbrot.go 55.62u 0.02s 55.77r
        gc mandelbrot   64.85u 0.01s 64.94r
        gc_B mandelbrot 65.02u 0.01s 65.14r

meteor 2100
        gcc -O2 meteor-contest.c        0.10u 0.00s 0.10r
        gccgo -O2 meteor-contest.go     0.10u 0.00s 0.11r
        gc meteor-contest       0.17u 0.00s 0.18r
        gc_B meteor-contest     0.16u 0.00s 0.16r

pidigits 10000
        gcc -O2 pidigits.c -lgmp        2.58u 0.00s 2.59r
        gccgo -O2 pidigits.go   14.06u 0.01s 14.09r # first run?
        gc pidigits     8.47u 0.05s 8.55r # 4.5X faster due to package big
        gc_B pidigits   8.33u 0.01s 8.36r # 4.5X faster due to package big

threadring 50000000
        gcc -O2 threadring.c    28.18u 153.19s 186.47r
        gccgo -O2 threadring.go 110.10u 516.48s 515.25r
        gc threadring   40.39u 0.00s 40.40r

chameneos 6000000
        gcc -O2 chameneosredux.c -lpthread      18.20u 301.55s 83.10r
        gccgo -O2 chameneosredux.go     52.22u 324.54s 201.21r
        gc chameneosredux       13.52u 0.00s 13.54r

Dec 14, 2010

# Improved regex code (same algorithm) gets ~30%.

regex-dna 100000
        gcc -O2 regex-dna.c -lpcre      0.77u 0.01s 0.78r
        gc regex-dna    6.80u 0.00s 6.81r
        gc regex-dna-parallel   6.82u 0.01s 2.75r
        gc_B regex-dna  6.69u 0.02s 6.70r

Feb 15, 2011

# Improved GC, still single-threaded but more efficient

fasta -n 25000000
        gcc -O2 fasta.c 3.40u 0.00s 3.40r
        gccgo -O2 fasta.go      3.51u 0.00s 3.50r
        gc fasta        3.66u 0.01s 3.66r
        gc_B fasta      3.66u 0.00s 3.66r

reverse-complement < output-of-fasta-25000000
        gcc -O2 reverse-complement.c    1.86u 1.29s 4.93r
        gccgo -O2 reverse-complement.go 2.18u 0.41s 2.60r
        gc reverse-complement   1.67u 0.48s 2.15r
        gc_B reverse-complement 1.71u 0.45s 2.15r

nbody -n 50000000
        gcc -O2 -lm nbody.c     21.64u 0.00s 21.64r
        gccgo -O2 nbody.go      21.46u 0.00s 21.45r
        gc nbody        29.07u 0.00s 29.06r
        gc_B nbody      31.61u 0.00s 31.61r

binary-tree 15 # too slow to use 20
        gcc -O2 binary-tree.c -lm       0.88u 0.00s 0.87r
        gccgo -O2 binary-tree.go        2.74u 0.07s 2.81r
        gccgo -O2 binary-tree-freelist.go       0.01u 0.00s 0.00r
        gc binary-tree  4.22u 0.02s 4.24r
        gc binary-tree-freelist 0.54u 0.02s 0.55r

fannkuch 12
        gcc -O2 fannkuch.c      57.64u 0.00s 57.64r
        gccgo -O2 fannkuch.go   65.79u 0.00s 65.82r
        gccgo -O2 fannkuch-parallel.go  160.91u 0.02s 43.90r
        gc fannkuch     126.36u 0.03s 126.53r
        gc fannkuch-parallel    175.23u 0.04s 45.49r
        gc_B fannkuch   89.23u 0.00s 89.24r

regex-dna 100000
        gcc -O2 regex-dna.c -lpcre      0.77u 0.01s 0.80r
        gccgo -O2 regex-dna.go  12.38u 0.10s 12.52r
        gccgo -O2 regex-dna-parallel.go 43.96u 4.64s 15.11r
        gc regex-dna    7.03u 0.01s 7.05r
        gc regex-dna-parallel   6.85u 0.05s 2.70r
        gc_B regex-dna  6.87u 0.02s 6.89r

spectral-norm 5500
        gcc -O2 spectral-norm.c -lm     12.29u 0.00s 12.28r
        gccgo -O2 spectral-norm.go      11.79u 0.00s 11.79r
        gc spectral-norm        24.00u 0.02s 24.05r
        gc_B spectral-norm      24.59u 0.01s 24.59r

k-nucleotide 1000000
        gcc -O2 k-nucleotide.c  9.75u 0.07s 9.82r
        gccgo -O2 k-nucleotide.go       8.92u 0.06s 8.98r
        gccgo -O2 k-nucleotide-parallel.go      8.40u 0.04s 2.76r
        gc k-nucleotide 17.01u 0.03s 17.04r
        gc k-nucleotide-parallel        16.51u 0.08s 6.21r
        gc_B k-nucleotide       16.94u 0.08s 17.02r

mandelbrot 16000
        gcc -O2 mandelbrot.c    54.60u 0.00s 54.66r
        gccgo -O2 mandelbrot.go 59.38u 0.00s 59.41r
        gc mandelbrot   64.93u 0.04s 65.08r
        gc_B mandelbrot 64.85u 0.03s 64.92r

meteor 2098
        gcc -O2 meteor-contest.c        0.10u 0.01s 0.10r
        gccgo -O2 meteor-contest.go     0.11u 0.00s 0.11r
        gc meteor-contest       0.18u 0.00s 0.17r
        gc_B meteor-contest     0.17u 0.00s 0.16r

pidigits 10000
        gcc -O2 pidigits.c -lgmp        2.24u 0.00s 2.23r
        gccgo -O2 pidigits.go   14.05u 0.00s 14.06r
        gc pidigits     6.34u 0.05s 6.38r
        gc_B pidigits   6.37u 0.02s 6.38r

threadring 50000000
        gcc -O2 threadring.c    30.50u 258.05s 325.72r
        gccgo -O2 threadring.go 92.87u 748.39s 728.46r
        gc threadring   38.03u 0.01s 38.04r

# Apr 15, 2011
# Move to new machine, Intel Xeon E5520@2.27GHz.
# (Was Opteron(tm) Processor 8214 HE)

fasta -n 25000000
OLD:
        gcc -O2 fasta.c 3.39u 0.04s 3.42r
        gccgo -O2 fasta.go      3.52u 0.00s 3.52r
        gc fasta        3.63u 0.04s 3.67r
        gc_B fasta      3.66u 0.00s 3.66r
NEW:
        gcc -O2 fasta.c 1.45u 0.02s 1.47r
        gccgo -O2 fasta.go      1.51u 0.01s 1.51r
        gc fasta        2.04u 0.00s 2.04r
        gc_B fasta      2.05u 0.00s 2.04r

reverse-complement < output-of-fasta-25000000
OLD:
        gcc -O2 reverse-complement.c    1.87u 1.51s 7.02r
        gccgo -O2 reverse-complement.go 1.56u 0.54s 3.37r
        gc reverse-complement   1.73u 0.36s 2.08r
        gc_B reverse-complement 1.75u 0.37s 2.12r
NEW:
        gcc -O2 reverse-complement.c    1.20u 0.47s 12.96r
        gccgo -O2 reverse-complement.go 0.88u 0.14s 1.01r
        gc reverse-complement   1.13u 0.17s 1.30r
        gc_B reverse-complement 1.11u 0.09s 1.20r

nbody -n 50000000
OLD:
        gcc -O2 -lm nbody.c     21.90u 0.00s 21.92r
        gccgo -O2 nbody.go      23.12u 0.03s 23.19r
        gc nbody        29.07u 0.00s 29.07r
        gc_B nbody      31.84u 0.00s 31.85r
NEW:
        gcc -O2 -lm nbody.c     13.01u 0.00s 13.03r
        gccgo -O2 nbody.go      13.35u 0.00s 13.37r
        gc nbody        21.78u 0.00s 21.82r
        gc_B nbody      21.72u 0.00s 21.76r

binary-tree 15 # too slow to use 20
OLD:
        gcc -O2 binary-tree.c -lm       0.83u 0.02s 0.84r
        gccgo -O2 binary-tree.go        2.61u 0.02s 2.62r
        gccgo -O2 binary-tree-freelist.go       0.32u 0.01s 0.32r
        gc binary-tree  3.93u 0.04s 3.97r
        gc binary-tree-freelist 0.47u 0.03s 0.50r
NEW:
        gcc -O2 binary-tree.c -lm       0.60u 0.00s 0.59r
        gccgo -O2 binary-tree.go        1.53u 0.00s 1.52r
        gccgo -O2 binary-tree-freelist.go       0.01u 0.00s 0.00r
        gc binary-tree  1.93u 0.02s 1.95r
        gc binary-tree-freelist 0.32u 0.01s 0.32r

fannkuch 12
OLD:
        gcc -O2 fannkuch.c      57.64u 0.00s 57.64r
        gccgo -O2 fannkuch.go   65.56u 0.01s 65.65r
        gccgo -O2 fannkuch-parallel.go  179.12u 0.00s 49.82r
        gc fannkuch     126.39u 0.00s 126.39r
        gc fannkuch-parallel    172.49u 0.02s 45.44r
        gc_B fannkuch   89.30u 0.00s 89.28r
NEW:
        gcc -O2 fannkuch.c      45.17u 0.00s 45.26r
        gccgo -O2 fannkuch.go   53.63u 0.00s 53.73r
        gccgo -O2 fannkuch-parallel.go  216.72u 0.00s 58.42r
        gc fannkuch     108.21u 0.00s 108.44r
        gc fannkuch-parallel    227.20u 0.00s 57.27r
        gc_B fannkuch   56.14u 0.00s 56.26r

regex-dna 100000
OLD:
        gcc -O2 regex-dna.c -lpcre      0.77u 0.01s 0.78r
        gccgo -O2 regex-dna.go  10.15u 0.02s 10.23r
        gccgo -O2 regex-dna-parallel.go 33.81u 3.22s 11.62r
        gc regex-dna    6.52u 0.04s 6.56r
        gc regex-dna-parallel   6.84u 0.03s 2.70r
        gc_B regex-dna  6.83u 0.01s 6.84r
NEW:
        gcc -O2 regex-dna.c -lpcre      0.47u 0.00s 0.47r
        gccgo -O2 regex-dna.go  6.00u 0.00s 6.00r
        gccgo -O2 regex-dna-parallel.go 44.54u 1.57s 6.51r
        gc regex-dna    5.41u 0.01s 5.42r
        gc regex-dna-parallel   5.62u 0.01s 2.20r
        gc_B regex-dna  5.50u 0.00s 5.50r

spectral-norm 5500
OLD:
        gcc -O2 spectral-norm.c -lm     12.29u 0.00s 12.28r
        gccgo -O2 spectral-norm.go      11.56u 0.00s 11.55r
        gc spectral-norm        23.98u 0.00s 24.00r
        gc_B spectral-norm      24.62u 0.00s 24.65r
NEW:
        gcc -O2 spectral-norm.c -lm     15.79u 0.00s 15.82r
        gccgo -O2 spectral-norm.go      15.32u 0.00s 15.35r
        gc spectral-norm        19.62u 0.01s 19.67r
        gc_B spectral-norm      19.62u 0.00s 19.66r

k-nucleotide 1000000
OLD:
        gcc -O2 k-nucleotide.c  9.82u 0.06s 9.87r
        gccgo -O2 k-nucleotide.go       8.30u 0.02s 8.32r
        gccgo -O2 k-nucleotide-parallel.go      8.84u 0.05s 3.02r
        gc k-nucleotide 15.38u 0.07s 15.44r
        gc k-nucleotide-parallel        16.40u 0.03s 5.93r
        gc_B k-nucleotide       15.19u 0.05s 15.23r
NEW:
        gcc -O2 -k-nucleotide.c 4.88u 0.03s 4.92r
        gccgo -O2 k-nucleotide.go       5.94u 0.01s 5.96r
        gccgo -O2 k-nucleotide-parallel.go      6.44u 0.03s 1.47r
        gc k-nucleotide 9.61u 0.01s 9.63r
        gc k-nucleotide-parallel        9.70u 0.00s 3.39r
        gc_B k-nucleotide       9.19u 0.03s 9.23r

mandelbrot 16000
OLD:
        gcc -O2 mandelbrot.c    54.54u 0.00s 54.56r
        gccgo -O2 mandelbrot.go 59.63u 0.03s 59.67r
        gc mandelbrot   64.82u 0.00s 64.83r
        gc_B mandelbrot 64.84u 0.00s 64.91r
NEW:
        gcc -O2 mandelbrot.c    36.07u 0.01s 36.15r
        gccgo -O2 mandelbrot.go 43.57u 0.00s 43.66r
        gc mandelbrot   60.66u 0.00s 60.79r
        gc_B mandelbrot 60.90u 0.00s 61.03r

meteor 2098
OLD:
        gcc -O2 meteor-contest.c        0.11u 0.00s 0.10r
        gccgo -O2 meteor-contest.go     0.10u 0.01s 0.10r
        gc meteor-contest       0.18u 0.00s 0.17r
        gc_B meteor-contest     0.17u 0.00s 0.16r
NEW:
        gcc -O2 meteor-contest.c        0.10u 0.00s 0.09r
        gccgo -O2 meteor-contest.go     0.10u 0.00s 0.09r
        gc meteor-contest       0.14u 0.00s 0.14r
        gc_B meteor-contest     0.13u 0.00s 0.13r

pidigits 10000
OLD:
        gcc -O2 pidigits.c -lgmp        2.22u 0.00s 2.21r
        gccgo -O2 pidigits.go   13.39u 0.00s 13.40r
        gc pidigits     6.42u 0.04s 6.45r
        gc_B pidigits   6.45u 0.02s 6.47r
NEW:
        gcc -O2 pidigits.c -lgmp        2.27u 0.00s 2.29r
        gccgo -O2 pidigits.go   9.21u 0.00s 9.22r
        gc pidigits     3.60u 0.00s 3.60r
        gc_B pidigits   3.56u 0.02s 3.58r

threadring 50000000
OLD:
        gcc -O2 threadring.c -lpthread  34.51u 267.95s 336.12r
        gccgo -O2 threadring.go 103.51u 588.57s 627.16r
        gc threadring   54.68u 0.00s 54.73r
NEW:
        gcc -O2 threadring.c 32.00u 259.39s 369.74r
        gccgo -O2 threadring.go 133.06u 546.02s 595.33r
        gc threadring   16.75u 0.02s 16.80r

chameneos 6000000
OLD:
        gcc -O2 chameneosredux.c -lpthread      12.65u 31.02s 13.33r
        gccgo -O2 chameneosredux.go     47.04u 302.84s 252.29r
        gc chameneosredux       14.14u 0.00s 14.14r
NEW:
        gcc -O2 chameneosredux.c -lpthread      8.05u 63.43s 11.16r
        gccgo -O2 chameneosredux.go     82.95u 304.37s 207.64r
        gc chameneosredux       9.42u 0.00s 9.43r

# May 13, 2011
# after gc update to inline append when possible - 35% faster

regex-dna 100000
        gc regex-dna    3.94u 0.00s 3.95r
        gc regex-dna-parallel   4.15u 0.01s 1.63r
        gc_B regex-dna  4.01u 0.01s 4.02r

# Aug 4, 2011
# After various updates to locking code and some runtime changes.
# Slowdowns believed due to slower (but more correct) memmove.

fannkuch 12
        gccgo -O2 fannkuch.go   51.59u 0.00s 51.69r # -4%
        gccgo -O2 fannkuch-parallel.go  253.17u 0.00s 64.67r # -11%
        gc fannkuch     103.14u 0.00s 103.36r # -5%
        gc fannkuch-parallel    189.63u 0.00s 49.37r # +9%
        gc_B fannkuch   49.19u 0.00s 49.29r # -14%

regex-dna 100000
        gc regex-dna    3.78u 0.00s 3.78r # -43%
        gc regex-dna-parallel   3.84u 0.02s 1.48r # -49%
        gc_B regex-dna  3.62u 0.00s 3.63r # -52%

k-nucleotide 1000000
        gc k-nucleotide 12.23u 0.02s 12.27r # +27%
        gc k-nucleotide-parallel        12.76u 0.02s 4.37r # +29%
        gc_B k-nucleotide       12.18u 0.01s 12.21r # +33%

threadring 50000000
        gc threadring   17.49u 0.00s 17.53r # +4%

chameneos 6000000
        gc chameneosredux       7.61u 0.00s 7.63r # -24%

Aug 9, 2011
# After custom algorithms for 1- 2- 4- 8-byte scalars.

fannkuch 12
        gc fannkuch-parallel    157.17u 0.00s 41.08r # -17%

k-nucleotide 1000000
        gc k-nucleotide 8.72u 0.03s 8.76r # -39%
        gc k-nucleotide-parallel        8.79u 0.01s 3.14r # -39%
        gc_B k-nucleotide       8.65u 0.03s 8.69r # -39%

pidigits 10000
        gc pidigits     3.71u 0.02s 3.73r # +4%
        gc_B pidigits   3.73u 0.00s 3.73r # +4%

threadring 50000000
        gc threadring   14.51u 0.00s 14.54r # -17%

chameneos 6000000
        gc chameneosredux       7.41u 0.00s 7.42r # -3%

Compare with Previous | Blame | View Log

powered by: WebSVN 2.1.0

© copyright 1999-2024 OpenCores.org, equivalent to Oliscience, all rights reserved. OpenCores®, registered trademark.