URL
https://opencores.org/ocsvn/openrisc/openrisc/trunk
Subversion Repositories openrisc
[/] [openrisc/] [trunk/] [gnu-dev/] [or1k-gcc/] [gcc/] [testsuite/] [go.test/] [test/] [bench/] [shootout/] [timing.log] - Rev 700
Compare with Previous | Blame | View Log
All tests on r45 or r70Aug 3 2009First version of fasta. Translation of fasta.c, fetched fromhttp://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4fasta -n 25000000gcc -O2 fasta.c 5.98u 0.00s 6.01rgccgo -O2 fasta.go 8.82u 0.02s 8.85r6g fasta.go 13.50u 0.02s 13.53r6g -B fata.go 12.99u 0.02s 13.02rAug 4 2009[added timing.sh]# myrandom:# hand-written optimization of integer division# use int32->float conversionfasta -n 25000000# probably I/O library inefficienciesgcc -O2 fasta.c 5.99u 0.00s 6.00rgccgo -O2 fasta.go 8.82u 0.02s 8.85rgc fasta 10.70u 0.00s 10.77rgc_B fasta 10.09u 0.03s 10.12rreverse-complement < output-of-fasta-25000000# we don't know - memory cache behavior?gcc -O2 reverse-complement.c 2.04u 0.94s 10.54rgccgo -O2 reverse-complement.go 6.54u 0.63s 7.17rgc reverse-complement 6.55u 0.70s 7.26rgc_B reverse-complement 6.32u 0.70s 7.10rnbody 50000000# math.Sqrt needs to be in assembly; inlining is probably the other 50%gcc -O2 nbody.c 21.61u 0.01s 24.80rgccgo -O2 nbody.go 118.55u 0.02s 120.32rgc nbody 100.84u 0.00s 100.85rgc_B nbody 103.33u 0.00s 103.39r[hacked Sqrt in assemblergc nbody 31.97u 0.00s 32.01r]binary-tree 15 # too slow to use 20# memory allocation and garbage collectiongcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87rgccgo -O2 binary-tree.go 1.69u 0.46s 2.15rgccgo -O2 binary-tree-freelist.go 8.48u 0.00s 8.48rgc binary-tree 9.60u 0.01s 9.62rgc binary-tree-freelist 0.48u 0.01s 0.50rAugust 5, 2009fannkuch 12# bounds checking is half the difference# rest might be registerizationgcc -O2 fannkuch.c 60.09u 0.01s 60.32rgccgo -O2 fannkuch.go 64.89u 0.00s 64.92rgc fannkuch 124.59u 0.00s 124.67rgc_B fannkuch 91.14u 0.00s 91.16rregex-dna 100000# regexp code is slow on trivial regexpgcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.99rgc regexp-dna 26.94u 0.18s 28.75rgc_B regexp-dna 26.51u 0.09s 26.75rspectral-norm 5500gcc -O2 spectral-norm.c -lm 11.54u 0.00s 11.55rgccgo -O2 spectral-norm.go 12.20u 0.00s 12.23rgc spectral-norm 50.23u 0.00s 50.36rgc_B spectral-norm 49.69u 0.01s 49.83rgc spectral-norm-parallel 24.47u 0.03s 11.05r # has shift >>1 not div /2[using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r]August 6, 2009k-nucleotide 5000000# string maps are slower than glib string mapsgcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 k-nucleotide.c: 10.72u 0.01s 10.74rgccgo -O2 k-nucleotide.go 21.64u 0.83s 22.78rgc k-nucleotide 16.08u 0.06s 16.50rgc_B k-nucleotide 17.32u 0.02s 17.37rmandelbrot 5500# floating point code generator should use more registersgcc -O2 mandelbrot.c 56.13u 0.02s 56.17rgccgo -O2 mandelbrot.go 57.49u 0.01s 57.51rgc mandelbrot 74.32u 0.00s 74.35rgc_B mandelbrot 74.28u 0.01s 74.31rmeteor 2100# we don't knowgcc -O2 meteor-contest.c 0.10u 0.00s 0.10rgccgo -O2 meteor-contest.go 0.12u 0.00s 0.14rgc meteor-contest 0.24u 0.00s 0.26rgc_B meteor-contest 0.23u 0.00s 0.24rpidigits 10000# bignum is slower than gmpgcc -O2 pidigits.c -lgmp 2.60u 0.00s 2.62rgc pidigits 77.69u 0.14s 78.18rgc_B pidigits 74.26u 0.18s 75.41rgc_B pidigits 68.48u 0.20s 69.31r # special case: no bounds checking in bignumAugust 7 2009# New gc does better division by powers of 2. Significant improvements:spectral-norm 5500# floating point code generator should use more registers; possibly inline evalAgcc -O2 spectral-norm.c -lm 11.50u 0.00s 11.50rgccgo -O2 spectral-norm.go 12.02u 0.00s 12.02rgc spectral-norm 23.98u 0.00s 24.00r # new time is 0.48 times old time, 52% fastergc_B spectral-norm 23.71u 0.01s 23.72r # dittogc spectral-norm-parallel 24.04u 0.00s 6.26r # /2 put back. note: 4x faster (on r70, idle)k-nucleotide 1000000# string maps are slower than glib string mapsgcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.82u 0.04s 10.87rgccgo -O2 k-nucleotide.go 22.73u 0.89s 23.63rgc k-nucleotide 15.97u 0.03s 16.04rgc_B k-nucleotide 15.86u 0.06s 15.93r # 8.5% faster, but probably due to weird cache effeccts in previous versionpidigits 10000# bignum is slower than gmpgcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.58rgc pidigits 71.24u 0.04s 71.28r # 8.5% fastergc_B pidigits 71.25u 0.03s 71.29r # 4% fasterthreadring 50000000gcc -O2 threadring.c -lpthread 35.51u 160.21s 199.50rgccgo -O2 threadring.go 90.33u 459.95s 448.03rgc threadring 33.11u 0.00s 33.14rGOMAXPROCS=4 gc threadring 114.48u 226.65s 371.59r# change wait code to do <-make(chan int) instead of time.Sleepgc threadring 28.41u 0.01s 29.35rGOMAXPROCS=4 gc threadring 112.59u 232.83s 384.72rchameneos 6000000gcc -O2 chameneosredux.c -lpthread 18.14u 276.52s 76.93rgc chameneosredux 20.19u 0.01s 20.23rAug 10 2009# new 6g with better fp registers, fast div and mod of integers# complete set of timings listed. significant changes marked ***fasta -n 25000000# probably I/O library inefficienciesgcc -O2 fasta.c 5.96u 0.00s 5.97rgc fasta 10.59u 0.01s 10.61rgc_B fasta 9.92u 0.02s 9.95rreverse-complement < output-of-fasta-25000000# we don't know - memory cache behavior?gcc -O2 reverse-complement.c 1.96u 1.56s 16.23rgccgo -O2 reverse-complement.go 6.41u 0.62s 7.05rgc reverse-complement 6.46u 0.70s 7.17rgc_B reverse-complement 6.22u 0.72s 6.95rnbody 50000000# math.Sqrt needs to be in assembly; inlining is probably the other 50%gcc -O2 nbody.c 21.26u 0.01s 21.28rgccgo -O2 nbody.go 116.68u 0.07s 116.80rgc nbody 86.64u 0.01s 86.68r # -14%gc_B nbody 85.72u 0.02s 85.77r # *** -17%binary-tree 15 # too slow to use 20# memory allocation and garbage collectiongcc -O2 binary-tree.c -lm 0.87u 0.00s 0.87rgccgo -O2 binary-tree.go 1.61u 0.47s 2.09rgccgo -O2 binary-tree-freelist.go 0.00u 0.00s 0.01rgc binary-tree 9.11u 0.01s 9.13r # *** -5%gc binary-tree-freelist 0.47u 0.01s 0.48rfannkuch 12# bounds checking is half the difference# rest might be registerizationgcc -O2 fannkuch.c 59.92u 0.00s 59.94rgccgo -O2 fannkuch.go 65.54u 0.00s 65.58rgc fannkuch 123.98u 0.01s 124.04rgc_B fannkuch 90.75u 0.00s 90.78rregex-dna 100000# regexp code is slow on trivial regexpgcc -O2 regex-dna.c -lpcre 0.91u 0.00s 0.92rgc regex-dna 27.25u 0.02s 27.28rgc_B regex-dna 29.51u 0.03s 29.55rspectral-norm 5500# possibly inline evalAgcc -O2 spectral-norm.c -lm 11.57u 0.00s 11.57rgccgo -O2 spectral-norm.go 12.07u 0.01s 12.08rgc spectral-norm 23.99u 0.00s 24.00rgc_B spectral-norm 23.73u 0.00s 23.75rk-nucleotide 1000000# string maps are slower than glib string mapsgcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.63u 0.02s 10.69rgccgo -O2 k-nucleotide.go 23.19u 0.91s 24.12rgc k-nucleotide 16.73u 0.04s 16.78r # *** +5% (but this one seems to vary by more than that)gc_B k-nucleotide 16.46u 0.04s 16.51r # *** +5%mandelbrot 16000gcc -O2 mandelbrot.c 56.16u 0.00s 56.16rgccgo -O2 mandelbrot.go 57.41u 0.01s 57.42rgc mandelbrot 64.05u 0.02s 64.08r # *** -14%gc_B mandelbrot 64.10u 0.02s 64.14r # *** -14%meteor 2100# we don't knowgcc -O2 meteor-contest.c 0.10u 0.00s 0.10rgccgo -O2 meteor-contest.go 0.12u 0.00s 0.12rgc meteor-contest 0.18u 0.00s 0.20r # *** -25%gc_B meteor-contest 0.17u 0.00s 0.18r # *** -24%pidigits 10000# bignum is slower than gmpgcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.57rgc pidigits 71.82u 0.04s 71.89rgc_B pidigits 71.84u 0.08s 71.98rthreadring 50000000gcc -O2 threadring.c -lpthread 30.91u 164.33s 204.57rgccgo -O2 threadring.go 87.12u 460.04s 447.61rgc threadring 38.55u 0.00s 38.56r # *** +16%chameneos 6000000gcc -O2 chameneosredux.c -lpthread 17.93u 323.65s 88.47rgc chameneosredux 21.72u 0.00s 21.73rAugust 10 2009# In-place versions for some bignum operations.pidigits 10000gcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57rgc pidigits 55.22u 0.04s 55.29r # *** -23%gc_B pidigits 55.49u 0.02s 55.60r # *** -23%September 3 2009# New 6g inlines slices, has a few other tweaks.# Complete rerun. Significant changes marked.fasta -n 25000000# probably I/O library inefficienciesgcc -O2 fasta.c 5.96u 0.00s 5.96rgc fasta 10.63u 0.02s 10.66rgc_B fasta 9.92u 0.01s 9.94rreverse-complement < output-of-fasta-25000000# we don't know - memory cache behavior?gcc -O2 reverse-complement.c 1.92u 0.33s 2.93rgccgo -O2 reverse-complement.go 6.76u 0.72s 7.58r # +5%gc reverse-complement 6.59u 0.70s 7.29r # +2%gc_B reverse-complement 5.57u 0.80s 6.37r # -10%nbody 50000000# math.Sqrt needs to be in assembly; inlining is probably the other 50%# also loop alignment appears to be criticalgcc -O2 nbody.c 21.28u 0.00s 21.28rgccgo -O2 nbody.go 119.21u 0.00s 119.22r # +2%gc nbody 109.72u 0.00s 109.78r # + 28% *****gc_B nbody 85.90u 0.00s 85.91rbinary-tree 15 # too slow to use 20# memory allocation and garbage collectiongcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87rgccgo -O2 binary-tree.go 1.88u 0.54s 2.42r # +17%gccgo -O2 binary-tree-freelist.go 0.01u 0.01s 0.02rgc binary-tree 8.94u 0.01s 8.96r # -2%gc binary-tree-freelist 0.47u 0.01s 0.48rfannkuch 12# bounds checking is half the difference# rest might be registerizationgcc -O2 fannkuch.c 60.12u 0.00s 60.12rgccgo -O2 fannkuch.go 92.62u 0.00s 92.66r # +41% ***gc fannkuch 123.90u 0.00s 123.92rgc_B fannkuch 89.71u 0.00s 89.74r # -1%regex-dna 100000# regexp code is slow on trivial regexpgcc -O2 regex-dna.c -lpcre 0.88u 0.00s 0.88rgc regex-dna 25.77u 0.01s 25.79r # -5%gc_B regex-dna 26.05u 0.02s 26.09r # -12% ***spectral-norm 5500# possibly inline evalAgcc -O2 spectral-norm.c -lm 11.51u 0.00s 11.51rgccgo -O2 spectral-norm.go 11.95u 0.00s 11.96rgc spectral-norm 24.23u 0.00s 24.23rgc_B spectral-norm 23.83u 0.00s 23.84rk-nucleotide 1000000# string maps are slower than glib string mapsgcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.68u 0.04s 10.72rgccgo -O2 k-nucleotide.go 23.03u 0.88s 23.92rgc k-nucleotide 15.79u 0.05s 15.85r # -5% (but this one seems to vary by more than that)gc_B k-nucleotide 17.88u 0.05s 17.95r # +8% (ditto)mandelbrot 16000gcc -O2 mandelbrot.c 56.17u 0.02s 56.20rgccgo -O2 mandelbrot.go 56.74u 0.02s 56.79r # -1%gc mandelbrot 63.31u 0.01s 63.35r # -1%gc_B mandelbrot 63.29u 0.00s 63.31r # -1%meteor 2100# we don't knowgcc -O2 meteor-contest.c 0.10u 0.00s 0.10rgccgo -O2 meteor-contest.go 0.11u 0.00s 0.12rgc meteor-contest 0.18u 0.00s 0.19rgc_B meteor-contest 0.17u 0.00s 0.18rpidigits 10000# bignum is slower than gmpgcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57rgc pidigits 55.87u 0.03s 55.91rgc_B pidigits 55.93u 0.03s 55.99r# these tests are compared using real time, since they run multiple processors# accuracy probably lowthreadring 50000000gcc -O2 threadring.c -lpthread 26.31u 164.69s 199.92r # -2%gccgo -O2 threadring.go 87.90u 487.26s 472.81r # +6%gc threadring 28.89u 0.00s 28.90r # -25% ***chameneos 6000000gcc -O2 chameneosredux.c -lpthread 16.41u 296.91s 81.17r # -8%gc chameneosredux 19.97u 0.00s 19.97r # -8%Sep 22, 2009# 6g inlines sliceslice in most cases.fasta -n 25000000# probably I/O library inefficienciesgc fasta 10.24u 0.00s 10.25r # -4%gc_B fasta 9.68u 0.01s 9.69r # -3%reverse-complement < output-of-fasta-25000000# we don't know - memory cache behavior?gc reverse-complement 6.67u 0.69s 7.37r # +1%gc_B reverse-complement 6.00u 0.64s 6.65r # +7%nbody -n 50000000# math.Sqrt needs to be in assembly; inlining is probably the other 50%# also loop alignment appears to be criticalgc nbody 86.27u 0.00s 86.29r # -21%gc_B nbody 104.52u 0.00s 104.54r # +22%fannkuch 12# bounds checking is half the difference# rest might be registerizationgc fannkuch 128.36u 0.00s 128.37r # +4%gc_B fannkuch 89.32u 0.00s 89.34rregex-dna 100000# regexp code is slow on trivial regexpgc regex-dna 24.82u 0.01s 24.86r # -4%gc_B regex-dna 24.55u 0.01s 24.57r # -6%spectral-norm 5500# possibly inline evalAgc spectral-norm 24.05u 0.00s 24.07r # -1%gc_B spectral-norm 23.60u 0.00s 23.65r # -1%k-nucleotide 1000000# string maps are slower than glib string mapsgc k-nucleotide 17.84u 0.04s 17.89r # +13% but mysterious variation continuesgc_B k-nucleotide 15.56u 0.08s 15.65r # -13% (ditto)mandelbrot 16000gc mandelbrot 64.08u 0.01s 64.11r # +1%gc_B mandelbrot 64.04u 0.00s 64.05r # +1%pidigits 10000# bignum is slower than gmpgc pidigits 58.68u 0.02s 58.72r # +5%gc_B pidigits 58.86u 0.05s 58.99r # +5%# these tests are compared using real time, since they run multiple processors# accuracy probably lowthreadring 50000000gc threadring 32.70u 0.02s 32.77r # +13%chameneos 6000000gc chameneosredux 26.62u 0.00s 26.63r # +13%Sep 24, 2009# Sqrt now in assembler for 6g.nbody -n 50000000# remember, at least for 6g, alignment of loops may be importantgcc -O2 nbody.c 21.24u 0.00s 21.25rgccgo -O2 nbody.go 121.03u 0.00s 121.04rgc nbody 30.26u 0.00s 30.27r # -65% ***gc_B nbody 30.20u 0.02s 30.22r # -72% ***Nov 13 2009# fix bug in regexp; take performance hit. good regexps will come in time.regex-dna 100000gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.94rgc regex-dna 29.78u 0.03s 29.83rgc_B regex-dna 32.63u 0.03s 32.74rNov 24 2009# Roger Peppe's rewrite of the benchmarkchameneos 6000000gcc -O2 chameneosredux.c -lpthread 18.00u 303.29s 83.64rgc chameneosredux 12.10u 0.00s 12.10r # 2.22X fasterJan 6, 2010# Long-overdue update. All numbers included in this complete run.# Some programs (e.g. reverse-complement) rewritten for speed.# Regular expressions much faster in common cases (although still far behind PCRE)# Bignum stuff improved# Better (but sometimes slower) locking in channels.fasta -n 25000000gcc -O2 fasta.c 5.99u 0.01s 6.00rgc fasta 9.11u 0.00s 9.12r # -11%gc_B fasta 8.60u 0.00s 8.62r # +12% ??reverse-complement < output-of-fasta-25000000gcc -O2 reverse-complement.c 2.00u 0.80s 9.54r# gccgo -O2 reverse-complement.go 4.57u 0.35s 4.94r # 33% fastergc reverse-complement 2.01u 0.38s 2.40r # 3.3X fastergc_B reverse-complement 1.88u 0.36s 2.24r # 3.2X fasterGOGC=offgc reverse-complement 2.01u 0.35s 2.37rgc_B reverse-complement 1.86u 0.32s 2.19rnbody -n 50000000gcc -O2 nbody.c 21.28u 0.00s 21.31rgccgo -O2 nbody.go 80.02u 0.00s 80.05r # 33% fastergc nbody 30.13u 0.00s 30.13rgc_B nbody 29.89u 0.01s 29.91rbinary-tree 15 # too slow to use 20gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87rgccgo -O2 binary-tree.go 4.82u 0.41s 5.24r # 2.5X slowergc binary-tree 7.23u 0.01s 7.25r # # -19%gc binary-tree-freelist 0.43u 0.00s 0.44r # -9%fannkuch 12gcc -O2 fannkuch.c 60.17u 0.00s 60.17rgccgo -O2 fannkuch.go 78.47u 0.01s 78.49rgc fannkuch 128.86u 0.00s 128.96rgc_B fannkuch 90.17u 0.00s 90.21rregex-dna 100000gcc -O2 regex-dna.c -lpcre 0.90u 0.00s 0.92rgc regex-dna 9.48u 0.01s 9.50r # 3.1X fastergc_B regex-dna 9.08u 0.00s 9.10r # 3.6X fasterspectral-norm 5500gcc -O2 spectral-norm.c -lm 11.48u 0.00s 11.48rgccgo -O2 spectral-norm.go 11.68u 0.00s 11.70rgc spectral-norm 23.98u 0.00s 23.99rgc_B spectral-norm 23.68u 0.00s 23.69rk-nucleotide 1000000gcc -O2 k-nucleotide.c 10.85u 0.04s 10.90rgccgo -O2 k-nucleotide.go 25.26u 0.87s 26.14rgc k-nucleotide 15.28u 0.06s 15.37r # restored; mysterious variation continuesgc_B k-nucleotide 15.97u 0.03s 16.00rmandelbrot 16000gcc -O2 mandelbrot.c 56.12u 0.01s 56.15rgccgo -O2 mandelbrot.go 56.86u 0.01s 56.89rgc mandelbrot 66.05u 0.00s 66.07r # -3%gc_B mandelbrot 66.06u 0.00s 66.07r # -3%meteor 2100gcc -O2 meteor-contest.c 0.10u 0.00s 0.10rgccgo -O2 meteor-contest.go 0.12u 0.00s 0.12rgc meteor-contest 0.17u 0.00s 0.17rgc_B meteor-contest 0.15u 0.00s 0.16rpidigits 10000gcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.59rgc pidigits 38.27u 0.02s 38.30r # 1.5X fastergc_B pidigits 38.27u 0.02s 38.31r # 1.5X fasterthreadring 50000000gcc -O2 threadring.c 37.11u 170.59s 212.75rgccgo -O2 threadring.go 89.67u 447.56s 442.55r # -6.5%gc threadring 36.08u 0.04s 36.15r # +10%chameneos 6000000gcc -O2 chameneosredux.c -lpthread 19.02u 331.08s 90.79rgc chameneosredux 12.54u 0.00s 12.55rOct 19, 2010# Another long-overdue update. Some of the code is new; parallel versions# of some are added. A few significant improvements.fasta -n 25000000gcc -O2 fasta.c 4.92u 0.00s 4.93rgccgo -O2 fasta.go 3.31u 0.00s 3.34r # new codegc fasta 3.68u 0.00s 3.69r # 2.5X faster with no codegc_B fasta 3.68u 0.00s 3.69r # 2.3X faster with no codereverse-complement < output-of-fasta-25000000gcc -O2 reverse-complement.c 1.93u 0.81s 11.24rgccgo -O2 reverse-complement.go 1.58u 0.43s 2.04r # first run with new code?gc reverse-complement 1.84u 0.34s 2.20r # 10% fastergc_B reverse-complement 1.85u 0.32s 2.18rnbody -n 50000000gcc -O2 nbody.c 21.35u 0.00s 21.36rgccgo -O2 nbody.go 21.62u 0.00s 21.66r # 3.7X faster - why??gc nbody 29.78u 0.00s 29.79rgc_B nbody 29.72u 0.00s 29.72rbinary-tree 15 # too slow to use 20gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.88rgccgo -O2 binary-tree.go 4.05u 0.02s 4.08r # 28% fastergccgo -O2 binary-tree-freelist 0.34u 0.08s 0.34rgc binary-tree 5.94u 0.00s 5.95r # 20% fastergc binary-tree-freelist 0.50u 0.01s 0.54rfannkuch 12gcc -O2 fannkuch.c 60.45u 0.00s 60.45rgccgo -O2 fannkuch.go 64.64u 0.00s 64.64rgccgo -O2 fannkuch-parallel.go 115.63u 0.00s 31.58rgc fannkuch 126.52u 0.04s 126.68rgc fannkuch-parallel 238.82u 0.10s 65.93r # GOMAXPROCS=4gc_B fannkuch 88.99u 0.00s 89.02rregex-dna 100000gcc -O2 regex-dna.c -lpcre 0.89u 0.00s 0.89rgc regex-dna 8.99u 0.02s 9.03rgc regex-dna-parallel 8.94u 0.02s 3.68r # GOMAXPROCS=4gc_B regex-dna 9.12u 0.00s 9.14rspectral-norm 5500gcc -O2 spectral-norm.c -lm 11.55u 0.00s 11.57rgccgo -O2 spectral-norm.go 11.73u 0.00s 11.75rgc spectral-norm 23.74u 0.00s 23.79rgc_B spectral-norm 24.49u 0.02s 24.54rk-nucleotide 1000000gcc -O2 k-nucleotide.c 11.44u 0.06s 11.50rgccgo -O2 k-nucleotide.go 8.65u 0.04s 8.71rgccgo -O2 k-nucleotide-parallel.go 8.75u 0.03s 2.97r # set GOMAXPROCS=4gc k-nucleotide 14.92u 0.05s 15.01rgc k-nucleotide-parallel 16.96u 0.06s 6.53r # set GOMAXPROCS=4gc_B k-nucleotide 15.97u 0.03s 16.08rmandelbrot 16000gcc -O2 mandelbrot.c 56.32u 0.00s 56.35rgccgo -O2 mandelbrot.go 55.62u 0.02s 55.77rgc mandelbrot 64.85u 0.01s 64.94rgc_B mandelbrot 65.02u 0.01s 65.14rmeteor 2100gcc -O2 meteor-contest.c 0.10u 0.00s 0.10rgccgo -O2 meteor-contest.go 0.10u 0.00s 0.11rgc meteor-contest 0.17u 0.00s 0.18rgc_B meteor-contest 0.16u 0.00s 0.16rpidigits 10000gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.59rgccgo -O2 pidigits.go 14.06u 0.01s 14.09r # first run?gc pidigits 8.47u 0.05s 8.55r # 4.5X faster due to package biggc_B pidigits 8.33u 0.01s 8.36r # 4.5X faster due to package bigthreadring 50000000gcc -O2 threadring.c 28.18u 153.19s 186.47rgccgo -O2 threadring.go 110.10u 516.48s 515.25rgc threadring 40.39u 0.00s 40.40rchameneos 6000000gcc -O2 chameneosredux.c -lpthread 18.20u 301.55s 83.10rgccgo -O2 chameneosredux.go 52.22u 324.54s 201.21rgc chameneosredux 13.52u 0.00s 13.54rDec 14, 2010# Improved regex code (same algorithm) gets ~30%.regex-dna 100000gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.78rgc regex-dna 6.80u 0.00s 6.81rgc regex-dna-parallel 6.82u 0.01s 2.75rgc_B regex-dna 6.69u 0.02s 6.70rFeb 15, 2011# Improved GC, still single-threaded but more efficientfasta -n 25000000gcc -O2 fasta.c 3.40u 0.00s 3.40rgccgo -O2 fasta.go 3.51u 0.00s 3.50rgc fasta 3.66u 0.01s 3.66rgc_B fasta 3.66u 0.00s 3.66rreverse-complement < output-of-fasta-25000000gcc -O2 reverse-complement.c 1.86u 1.29s 4.93rgccgo -O2 reverse-complement.go 2.18u 0.41s 2.60rgc reverse-complement 1.67u 0.48s 2.15rgc_B reverse-complement 1.71u 0.45s 2.15rnbody -n 50000000gcc -O2 -lm nbody.c 21.64u 0.00s 21.64rgccgo -O2 nbody.go 21.46u 0.00s 21.45rgc nbody 29.07u 0.00s 29.06rgc_B nbody 31.61u 0.00s 31.61rbinary-tree 15 # too slow to use 20gcc -O2 binary-tree.c -lm 0.88u 0.00s 0.87rgccgo -O2 binary-tree.go 2.74u 0.07s 2.81rgccgo -O2 binary-tree-freelist.go 0.01u 0.00s 0.00rgc binary-tree 4.22u 0.02s 4.24rgc binary-tree-freelist 0.54u 0.02s 0.55rfannkuch 12gcc -O2 fannkuch.c 57.64u 0.00s 57.64rgccgo -O2 fannkuch.go 65.79u 0.00s 65.82rgccgo -O2 fannkuch-parallel.go 160.91u 0.02s 43.90rgc fannkuch 126.36u 0.03s 126.53rgc fannkuch-parallel 175.23u 0.04s 45.49rgc_B fannkuch 89.23u 0.00s 89.24rregex-dna 100000gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.80rgccgo -O2 regex-dna.go 12.38u 0.10s 12.52rgccgo -O2 regex-dna-parallel.go 43.96u 4.64s 15.11rgc regex-dna 7.03u 0.01s 7.05rgc regex-dna-parallel 6.85u 0.05s 2.70rgc_B regex-dna 6.87u 0.02s 6.89rspectral-norm 5500gcc -O2 spectral-norm.c -lm 12.29u 0.00s 12.28rgccgo -O2 spectral-norm.go 11.79u 0.00s 11.79rgc spectral-norm 24.00u 0.02s 24.05rgc_B spectral-norm 24.59u 0.01s 24.59rk-nucleotide 1000000gcc -O2 k-nucleotide.c 9.75u 0.07s 9.82rgccgo -O2 k-nucleotide.go 8.92u 0.06s 8.98rgccgo -O2 k-nucleotide-parallel.go 8.40u 0.04s 2.76rgc k-nucleotide 17.01u 0.03s 17.04rgc k-nucleotide-parallel 16.51u 0.08s 6.21rgc_B k-nucleotide 16.94u 0.08s 17.02rmandelbrot 16000gcc -O2 mandelbrot.c 54.60u 0.00s 54.66rgccgo -O2 mandelbrot.go 59.38u 0.00s 59.41rgc mandelbrot 64.93u 0.04s 65.08rgc_B mandelbrot 64.85u 0.03s 64.92rmeteor 2098gcc -O2 meteor-contest.c 0.10u 0.01s 0.10rgccgo -O2 meteor-contest.go 0.11u 0.00s 0.11rgc meteor-contest 0.18u 0.00s 0.17rgc_B meteor-contest 0.17u 0.00s 0.16rpidigits 10000gcc -O2 pidigits.c -lgmp 2.24u 0.00s 2.23rgccgo -O2 pidigits.go 14.05u 0.00s 14.06rgc pidigits 6.34u 0.05s 6.38rgc_B pidigits 6.37u 0.02s 6.38rthreadring 50000000gcc -O2 threadring.c 30.50u 258.05s 325.72rgccgo -O2 threadring.go 92.87u 748.39s 728.46rgc threadring 38.03u 0.01s 38.04r# Apr 15, 2011# Move to new machine, Intel Xeon E5520@2.27GHz.# (Was Opteron(tm) Processor 8214 HE)fasta -n 25000000OLD:gcc -O2 fasta.c 3.39u 0.04s 3.42rgccgo -O2 fasta.go 3.52u 0.00s 3.52rgc fasta 3.63u 0.04s 3.67rgc_B fasta 3.66u 0.00s 3.66rNEW:gcc -O2 fasta.c 1.45u 0.02s 1.47rgccgo -O2 fasta.go 1.51u 0.01s 1.51rgc fasta 2.04u 0.00s 2.04rgc_B fasta 2.05u 0.00s 2.04rreverse-complement < output-of-fasta-25000000OLD:gcc -O2 reverse-complement.c 1.87u 1.51s 7.02rgccgo -O2 reverse-complement.go 1.56u 0.54s 3.37rgc reverse-complement 1.73u 0.36s 2.08rgc_B reverse-complement 1.75u 0.37s 2.12rNEW:gcc -O2 reverse-complement.c 1.20u 0.47s 12.96rgccgo -O2 reverse-complement.go 0.88u 0.14s 1.01rgc reverse-complement 1.13u 0.17s 1.30rgc_B reverse-complement 1.11u 0.09s 1.20rnbody -n 50000000OLD:gcc -O2 -lm nbody.c 21.90u 0.00s 21.92rgccgo -O2 nbody.go 23.12u 0.03s 23.19rgc nbody 29.07u 0.00s 29.07rgc_B nbody 31.84u 0.00s 31.85rNEW:gcc -O2 -lm nbody.c 13.01u 0.00s 13.03rgccgo -O2 nbody.go 13.35u 0.00s 13.37rgc nbody 21.78u 0.00s 21.82rgc_B nbody 21.72u 0.00s 21.76rbinary-tree 15 # too slow to use 20OLD:gcc -O2 binary-tree.c -lm 0.83u 0.02s 0.84rgccgo -O2 binary-tree.go 2.61u 0.02s 2.62rgccgo -O2 binary-tree-freelist.go 0.32u 0.01s 0.32rgc binary-tree 3.93u 0.04s 3.97rgc binary-tree-freelist 0.47u 0.03s 0.50rNEW:gcc -O2 binary-tree.c -lm 0.60u 0.00s 0.59rgccgo -O2 binary-tree.go 1.53u 0.00s 1.52rgccgo -O2 binary-tree-freelist.go 0.01u 0.00s 0.00rgc binary-tree 1.93u 0.02s 1.95rgc binary-tree-freelist 0.32u 0.01s 0.32rfannkuch 12OLD:gcc -O2 fannkuch.c 57.64u 0.00s 57.64rgccgo -O2 fannkuch.go 65.56u 0.01s 65.65rgccgo -O2 fannkuch-parallel.go 179.12u 0.00s 49.82rgc fannkuch 126.39u 0.00s 126.39rgc fannkuch-parallel 172.49u 0.02s 45.44rgc_B fannkuch 89.30u 0.00s 89.28rNEW:gcc -O2 fannkuch.c 45.17u 0.00s 45.26rgccgo -O2 fannkuch.go 53.63u 0.00s 53.73rgccgo -O2 fannkuch-parallel.go 216.72u 0.00s 58.42rgc fannkuch 108.21u 0.00s 108.44rgc fannkuch-parallel 227.20u 0.00s 57.27rgc_B fannkuch 56.14u 0.00s 56.26rregex-dna 100000OLD:gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.78rgccgo -O2 regex-dna.go 10.15u 0.02s 10.23rgccgo -O2 regex-dna-parallel.go 33.81u 3.22s 11.62rgc regex-dna 6.52u 0.04s 6.56rgc regex-dna-parallel 6.84u 0.03s 2.70rgc_B regex-dna 6.83u 0.01s 6.84rNEW:gcc -O2 regex-dna.c -lpcre 0.47u 0.00s 0.47rgccgo -O2 regex-dna.go 6.00u 0.00s 6.00rgccgo -O2 regex-dna-parallel.go 44.54u 1.57s 6.51rgc regex-dna 5.41u 0.01s 5.42rgc regex-dna-parallel 5.62u 0.01s 2.20rgc_B regex-dna 5.50u 0.00s 5.50rspectral-norm 5500OLD:gcc -O2 spectral-norm.c -lm 12.29u 0.00s 12.28rgccgo -O2 spectral-norm.go 11.56u 0.00s 11.55rgc spectral-norm 23.98u 0.00s 24.00rgc_B spectral-norm 24.62u 0.00s 24.65rNEW:gcc -O2 spectral-norm.c -lm 15.79u 0.00s 15.82rgccgo -O2 spectral-norm.go 15.32u 0.00s 15.35rgc spectral-norm 19.62u 0.01s 19.67rgc_B spectral-norm 19.62u 0.00s 19.66rk-nucleotide 1000000OLD:gcc -O2 k-nucleotide.c 9.82u 0.06s 9.87rgccgo -O2 k-nucleotide.go 8.30u 0.02s 8.32rgccgo -O2 k-nucleotide-parallel.go 8.84u 0.05s 3.02rgc k-nucleotide 15.38u 0.07s 15.44rgc k-nucleotide-parallel 16.40u 0.03s 5.93rgc_B k-nucleotide 15.19u 0.05s 15.23rNEW:gcc -O2 -k-nucleotide.c 4.88u 0.03s 4.92rgccgo -O2 k-nucleotide.go 5.94u 0.01s 5.96rgccgo -O2 k-nucleotide-parallel.go 6.44u 0.03s 1.47rgc k-nucleotide 9.61u 0.01s 9.63rgc k-nucleotide-parallel 9.70u 0.00s 3.39rgc_B k-nucleotide 9.19u 0.03s 9.23rmandelbrot 16000OLD:gcc -O2 mandelbrot.c 54.54u 0.00s 54.56rgccgo -O2 mandelbrot.go 59.63u 0.03s 59.67rgc mandelbrot 64.82u 0.00s 64.83rgc_B mandelbrot 64.84u 0.00s 64.91rNEW:gcc -O2 mandelbrot.c 36.07u 0.01s 36.15rgccgo -O2 mandelbrot.go 43.57u 0.00s 43.66rgc mandelbrot 60.66u 0.00s 60.79rgc_B mandelbrot 60.90u 0.00s 61.03rmeteor 2098OLD:gcc -O2 meteor-contest.c 0.11u 0.00s 0.10rgccgo -O2 meteor-contest.go 0.10u 0.01s 0.10rgc meteor-contest 0.18u 0.00s 0.17rgc_B meteor-contest 0.17u 0.00s 0.16rNEW:gcc -O2 meteor-contest.c 0.10u 0.00s 0.09rgccgo -O2 meteor-contest.go 0.10u 0.00s 0.09rgc meteor-contest 0.14u 0.00s 0.14rgc_B meteor-contest 0.13u 0.00s 0.13rpidigits 10000OLD:gcc -O2 pidigits.c -lgmp 2.22u 0.00s 2.21rgccgo -O2 pidigits.go 13.39u 0.00s 13.40rgc pidigits 6.42u 0.04s 6.45rgc_B pidigits 6.45u 0.02s 6.47rNEW:gcc -O2 pidigits.c -lgmp 2.27u 0.00s 2.29rgccgo -O2 pidigits.go 9.21u 0.00s 9.22rgc pidigits 3.60u 0.00s 3.60rgc_B pidigits 3.56u 0.02s 3.58rthreadring 50000000OLD:gcc -O2 threadring.c -lpthread 34.51u 267.95s 336.12rgccgo -O2 threadring.go 103.51u 588.57s 627.16rgc threadring 54.68u 0.00s 54.73rNEW:gcc -O2 threadring.c 32.00u 259.39s 369.74rgccgo -O2 threadring.go 133.06u 546.02s 595.33rgc threadring 16.75u 0.02s 16.80rchameneos 6000000OLD:gcc -O2 chameneosredux.c -lpthread 12.65u 31.02s 13.33rgccgo -O2 chameneosredux.go 47.04u 302.84s 252.29rgc chameneosredux 14.14u 0.00s 14.14rNEW:gcc -O2 chameneosredux.c -lpthread 8.05u 63.43s 11.16rgccgo -O2 chameneosredux.go 82.95u 304.37s 207.64rgc chameneosredux 9.42u 0.00s 9.43r# May 13, 2011# after gc update to inline append when possible - 35% fasterregex-dna 100000gc regex-dna 3.94u 0.00s 3.95rgc regex-dna-parallel 4.15u 0.01s 1.63rgc_B regex-dna 4.01u 0.01s 4.02r# Aug 4, 2011# After various updates to locking code and some runtime changes.# Slowdowns believed due to slower (but more correct) memmove.fannkuch 12gccgo -O2 fannkuch.go 51.59u 0.00s 51.69r # -4%gccgo -O2 fannkuch-parallel.go 253.17u 0.00s 64.67r # -11%gc fannkuch 103.14u 0.00s 103.36r # -5%gc fannkuch-parallel 189.63u 0.00s 49.37r # +9%gc_B fannkuch 49.19u 0.00s 49.29r # -14%regex-dna 100000gc regex-dna 3.78u 0.00s 3.78r # -43%gc regex-dna-parallel 3.84u 0.02s 1.48r # -49%gc_B regex-dna 3.62u 0.00s 3.63r # -52%k-nucleotide 1000000gc k-nucleotide 12.23u 0.02s 12.27r # +27%gc k-nucleotide-parallel 12.76u 0.02s 4.37r # +29%gc_B k-nucleotide 12.18u 0.01s 12.21r # +33%threadring 50000000gc threadring 17.49u 0.00s 17.53r # +4%chameneos 6000000gc chameneosredux 7.61u 0.00s 7.63r # -24%Aug 9, 2011# After custom algorithms for 1- 2- 4- 8-byte scalars.fannkuch 12gc fannkuch-parallel 157.17u 0.00s 41.08r # -17%k-nucleotide 1000000gc k-nucleotide 8.72u 0.03s 8.76r # -39%gc k-nucleotide-parallel 8.79u 0.01s 3.14r # -39%gc_B k-nucleotide 8.65u 0.03s 8.69r # -39%pidigits 10000gc pidigits 3.71u 0.02s 3.73r # +4%gc_B pidigits 3.73u 0.00s 3.73r # +4%threadring 50000000gc threadring 14.51u 0.00s 14.54r # -17%chameneos 6000000gc chameneosredux 7.41u 0.00s 7.42r # -3%
