1 |
700 |
jeremybenn |
All tests on r45 or r70
|
2 |
|
|
|
3 |
|
|
Aug 3 2009
|
4 |
|
|
|
5 |
|
|
First version of fasta. Translation of fasta.c, fetched from
|
6 |
|
|
http://shootout.alioth.debian.org/u32q/benchmark.php?test=fasta&lang=gpp&id=4
|
7 |
|
|
|
8 |
|
|
fasta -n 25000000
|
9 |
|
|
gcc -O2 fasta.c 5.98u 0.00s 6.01r
|
10 |
|
|
gccgo -O2 fasta.go 8.82u 0.02s 8.85r
|
11 |
|
|
6g fasta.go 13.50u 0.02s 13.53r
|
12 |
|
|
6g -B fata.go 12.99u 0.02s 13.02r
|
13 |
|
|
|
14 |
|
|
Aug 4 2009
|
15 |
|
|
[added timing.sh]
|
16 |
|
|
|
17 |
|
|
# myrandom:
|
18 |
|
|
# hand-written optimization of integer division
|
19 |
|
|
# use int32->float conversion
|
20 |
|
|
fasta -n 25000000
|
21 |
|
|
# probably I/O library inefficiencies
|
22 |
|
|
gcc -O2 fasta.c 5.99u 0.00s 6.00r
|
23 |
|
|
gccgo -O2 fasta.go 8.82u 0.02s 8.85r
|
24 |
|
|
gc fasta 10.70u 0.00s 10.77r
|
25 |
|
|
gc_B fasta 10.09u 0.03s 10.12r
|
26 |
|
|
|
27 |
|
|
reverse-complement < output-of-fasta-25000000
|
28 |
|
|
# we don't know - memory cache behavior?
|
29 |
|
|
gcc -O2 reverse-complement.c 2.04u 0.94s 10.54r
|
30 |
|
|
gccgo -O2 reverse-complement.go 6.54u 0.63s 7.17r
|
31 |
|
|
gc reverse-complement 6.55u 0.70s 7.26r
|
32 |
|
|
gc_B reverse-complement 6.32u 0.70s 7.10r
|
33 |
|
|
|
34 |
|
|
nbody 50000000
|
35 |
|
|
# math.Sqrt needs to be in assembly; inlining is probably the other 50%
|
36 |
|
|
gcc -O2 nbody.c 21.61u 0.01s 24.80r
|
37 |
|
|
gccgo -O2 nbody.go 118.55u 0.02s 120.32r
|
38 |
|
|
gc nbody 100.84u 0.00s 100.85r
|
39 |
|
|
gc_B nbody 103.33u 0.00s 103.39r
|
40 |
|
|
[
|
41 |
|
|
hacked Sqrt in assembler
|
42 |
|
|
gc nbody 31.97u 0.00s 32.01r
|
43 |
|
|
]
|
44 |
|
|
|
45 |
|
|
binary-tree 15 # too slow to use 20
|
46 |
|
|
# memory allocation and garbage collection
|
47 |
|
|
gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r
|
48 |
|
|
gccgo -O2 binary-tree.go 1.69u 0.46s 2.15r
|
49 |
|
|
gccgo -O2 binary-tree-freelist.go 8.48u 0.00s 8.48r
|
50 |
|
|
gc binary-tree 9.60u 0.01s 9.62r
|
51 |
|
|
gc binary-tree-freelist 0.48u 0.01s 0.50r
|
52 |
|
|
|
53 |
|
|
August 5, 2009
|
54 |
|
|
|
55 |
|
|
fannkuch 12
|
56 |
|
|
# bounds checking is half the difference
|
57 |
|
|
# rest might be registerization
|
58 |
|
|
gcc -O2 fannkuch.c 60.09u 0.01s 60.32r
|
59 |
|
|
gccgo -O2 fannkuch.go 64.89u 0.00s 64.92r
|
60 |
|
|
gc fannkuch 124.59u 0.00s 124.67r
|
61 |
|
|
gc_B fannkuch 91.14u 0.00s 91.16r
|
62 |
|
|
|
63 |
|
|
regex-dna 100000
|
64 |
|
|
# regexp code is slow on trivial regexp
|
65 |
|
|
gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.99r
|
66 |
|
|
gc regexp-dna 26.94u 0.18s 28.75r
|
67 |
|
|
gc_B regexp-dna 26.51u 0.09s 26.75r
|
68 |
|
|
|
69 |
|
|
spectral-norm 5500
|
70 |
|
|
gcc -O2 spectral-norm.c -lm 11.54u 0.00s 11.55r
|
71 |
|
|
gccgo -O2 spectral-norm.go 12.20u 0.00s 12.23r
|
72 |
|
|
gc spectral-norm 50.23u 0.00s 50.36r
|
73 |
|
|
gc_B spectral-norm 49.69u 0.01s 49.83r
|
74 |
|
|
gc spectral-norm-parallel 24.47u 0.03s 11.05r # has shift >>1 not div /2
|
75 |
|
|
[using >>1 instead of /2 : gc gives 24.33u 0.00s 24.33r]
|
76 |
|
|
|
77 |
|
|
August 6, 2009
|
78 |
|
|
|
79 |
|
|
k-nucleotide 5000000
|
80 |
|
|
# string maps are slower than glib string maps
|
81 |
|
|
gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 k-nucleotide.c: 10.72u 0.01s 10.74r
|
82 |
|
|
gccgo -O2 k-nucleotide.go 21.64u 0.83s 22.78r
|
83 |
|
|
gc k-nucleotide 16.08u 0.06s 16.50r
|
84 |
|
|
gc_B k-nucleotide 17.32u 0.02s 17.37r
|
85 |
|
|
|
86 |
|
|
mandelbrot 5500
|
87 |
|
|
# floating point code generator should use more registers
|
88 |
|
|
gcc -O2 mandelbrot.c 56.13u 0.02s 56.17r
|
89 |
|
|
gccgo -O2 mandelbrot.go 57.49u 0.01s 57.51r
|
90 |
|
|
gc mandelbrot 74.32u 0.00s 74.35r
|
91 |
|
|
gc_B mandelbrot 74.28u 0.01s 74.31r
|
92 |
|
|
|
93 |
|
|
meteor 2100
|
94 |
|
|
# we don't know
|
95 |
|
|
gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
|
96 |
|
|
gccgo -O2 meteor-contest.go 0.12u 0.00s 0.14r
|
97 |
|
|
gc meteor-contest 0.24u 0.00s 0.26r
|
98 |
|
|
gc_B meteor-contest 0.23u 0.00s 0.24r
|
99 |
|
|
|
100 |
|
|
pidigits 10000
|
101 |
|
|
# bignum is slower than gmp
|
102 |
|
|
gcc -O2 pidigits.c -lgmp 2.60u 0.00s 2.62r
|
103 |
|
|
gc pidigits 77.69u 0.14s 78.18r
|
104 |
|
|
gc_B pidigits 74.26u 0.18s 75.41r
|
105 |
|
|
gc_B pidigits 68.48u 0.20s 69.31r # special case: no bounds checking in bignum
|
106 |
|
|
|
107 |
|
|
August 7 2009
|
108 |
|
|
|
109 |
|
|
# New gc does better division by powers of 2. Significant improvements:
|
110 |
|
|
|
111 |
|
|
spectral-norm 5500
|
112 |
|
|
# floating point code generator should use more registers; possibly inline evalA
|
113 |
|
|
gcc -O2 spectral-norm.c -lm 11.50u 0.00s 11.50r
|
114 |
|
|
gccgo -O2 spectral-norm.go 12.02u 0.00s 12.02r
|
115 |
|
|
gc spectral-norm 23.98u 0.00s 24.00r # new time is 0.48 times old time, 52% faster
|
116 |
|
|
gc_B spectral-norm 23.71u 0.01s 23.72r # ditto
|
117 |
|
|
gc spectral-norm-parallel 24.04u 0.00s 6.26r # /2 put back. note: 4x faster (on r70, idle)
|
118 |
|
|
|
119 |
|
|
k-nucleotide 1000000
|
120 |
|
|
# string maps are slower than glib string maps
|
121 |
|
|
gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.82u 0.04s 10.87r
|
122 |
|
|
gccgo -O2 k-nucleotide.go 22.73u 0.89s 23.63r
|
123 |
|
|
gc k-nucleotide 15.97u 0.03s 16.04r
|
124 |
|
|
gc_B k-nucleotide 15.86u 0.06s 15.93r # 8.5% faster, but probably due to weird cache effeccts in previous version
|
125 |
|
|
|
126 |
|
|
pidigits 10000
|
127 |
|
|
# bignum is slower than gmp
|
128 |
|
|
gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.58r
|
129 |
|
|
gc pidigits 71.24u 0.04s 71.28r # 8.5% faster
|
130 |
|
|
gc_B pidigits 71.25u 0.03s 71.29r # 4% faster
|
131 |
|
|
|
132 |
|
|
threadring 50000000
|
133 |
|
|
gcc -O2 threadring.c -lpthread 35.51u 160.21s 199.50r
|
134 |
|
|
gccgo -O2 threadring.go 90.33u 459.95s 448.03r
|
135 |
|
|
gc threadring 33.11u 0.00s 33.14r
|
136 |
|
|
GOMAXPROCS=4 gc threadring 114.48u 226.65s 371.59r
|
137 |
|
|
# change wait code to do <-make(chan int) instead of time.Sleep
|
138 |
|
|
gc threadring 28.41u 0.01s 29.35r
|
139 |
|
|
GOMAXPROCS=4 gc threadring 112.59u 232.83s 384.72r
|
140 |
|
|
|
141 |
|
|
chameneos 6000000
|
142 |
|
|
gcc -O2 chameneosredux.c -lpthread 18.14u 276.52s 76.93r
|
143 |
|
|
gc chameneosredux 20.19u 0.01s 20.23r
|
144 |
|
|
|
145 |
|
|
Aug 10 2009
|
146 |
|
|
|
147 |
|
|
# new 6g with better fp registers, fast div and mod of integers
|
148 |
|
|
# complete set of timings listed. significant changes marked ***
|
149 |
|
|
|
150 |
|
|
fasta -n 25000000
|
151 |
|
|
# probably I/O library inefficiencies
|
152 |
|
|
gcc -O2 fasta.c 5.96u 0.00s 5.97r
|
153 |
|
|
gc fasta 10.59u 0.01s 10.61r
|
154 |
|
|
gc_B fasta 9.92u 0.02s 9.95r
|
155 |
|
|
|
156 |
|
|
reverse-complement < output-of-fasta-25000000
|
157 |
|
|
# we don't know - memory cache behavior?
|
158 |
|
|
gcc -O2 reverse-complement.c 1.96u 1.56s 16.23r
|
159 |
|
|
gccgo -O2 reverse-complement.go 6.41u 0.62s 7.05r
|
160 |
|
|
gc reverse-complement 6.46u 0.70s 7.17r
|
161 |
|
|
gc_B reverse-complement 6.22u 0.72s 6.95r
|
162 |
|
|
|
163 |
|
|
nbody 50000000
|
164 |
|
|
# math.Sqrt needs to be in assembly; inlining is probably the other 50%
|
165 |
|
|
gcc -O2 nbody.c 21.26u 0.01s 21.28r
|
166 |
|
|
gccgo -O2 nbody.go 116.68u 0.07s 116.80r
|
167 |
|
|
gc nbody 86.64u 0.01s 86.68r # -14%
|
168 |
|
|
gc_B nbody 85.72u 0.02s 85.77r # *** -17%
|
169 |
|
|
|
170 |
|
|
binary-tree 15 # too slow to use 20
|
171 |
|
|
# memory allocation and garbage collection
|
172 |
|
|
gcc -O2 binary-tree.c -lm 0.87u 0.00s 0.87r
|
173 |
|
|
gccgo -O2 binary-tree.go 1.61u 0.47s 2.09r
|
174 |
|
|
gccgo -O2 binary-tree-freelist.go 0.00u 0.00s 0.01r
|
175 |
|
|
gc binary-tree 9.11u 0.01s 9.13r # *** -5%
|
176 |
|
|
gc binary-tree-freelist 0.47u 0.01s 0.48r
|
177 |
|
|
|
178 |
|
|
fannkuch 12
|
179 |
|
|
# bounds checking is half the difference
|
180 |
|
|
# rest might be registerization
|
181 |
|
|
gcc -O2 fannkuch.c 59.92u 0.00s 59.94r
|
182 |
|
|
gccgo -O2 fannkuch.go 65.54u 0.00s 65.58r
|
183 |
|
|
gc fannkuch 123.98u 0.01s 124.04r
|
184 |
|
|
gc_B fannkuch 90.75u 0.00s 90.78r
|
185 |
|
|
|
186 |
|
|
regex-dna 100000
|
187 |
|
|
# regexp code is slow on trivial regexp
|
188 |
|
|
gcc -O2 regex-dna.c -lpcre 0.91u 0.00s 0.92r
|
189 |
|
|
gc regex-dna 27.25u 0.02s 27.28r
|
190 |
|
|
gc_B regex-dna 29.51u 0.03s 29.55r
|
191 |
|
|
|
192 |
|
|
spectral-norm 5500
|
193 |
|
|
# possibly inline evalA
|
194 |
|
|
gcc -O2 spectral-norm.c -lm 11.57u 0.00s 11.57r
|
195 |
|
|
gccgo -O2 spectral-norm.go 12.07u 0.01s 12.08r
|
196 |
|
|
gc spectral-norm 23.99u 0.00s 24.00r
|
197 |
|
|
gc_B spectral-norm 23.73u 0.00s 23.75r
|
198 |
|
|
|
199 |
|
|
k-nucleotide 1000000
|
200 |
|
|
# string maps are slower than glib string maps
|
201 |
|
|
gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.63u 0.02s 10.69r
|
202 |
|
|
gccgo -O2 k-nucleotide.go 23.19u 0.91s 24.12r
|
203 |
|
|
gc k-nucleotide 16.73u 0.04s 16.78r # *** +5% (but this one seems to vary by more than that)
|
204 |
|
|
gc_B k-nucleotide 16.46u 0.04s 16.51r # *** +5%
|
205 |
|
|
|
206 |
|
|
mandelbrot 16000
|
207 |
|
|
gcc -O2 mandelbrot.c 56.16u 0.00s 56.16r
|
208 |
|
|
gccgo -O2 mandelbrot.go 57.41u 0.01s 57.42r
|
209 |
|
|
gc mandelbrot 64.05u 0.02s 64.08r # *** -14%
|
210 |
|
|
gc_B mandelbrot 64.10u 0.02s 64.14r # *** -14%
|
211 |
|
|
|
212 |
|
|
meteor 2100
|
213 |
|
|
# we don't know
|
214 |
|
|
gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
|
215 |
|
|
gccgo -O2 meteor-contest.go 0.12u 0.00s 0.12r
|
216 |
|
|
gc meteor-contest 0.18u 0.00s 0.20r # *** -25%
|
217 |
|
|
gc_B meteor-contest 0.17u 0.00s 0.18r # *** -24%
|
218 |
|
|
|
219 |
|
|
pidigits 10000
|
220 |
|
|
# bignum is slower than gmp
|
221 |
|
|
gcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.57r
|
222 |
|
|
gc pidigits 71.82u 0.04s 71.89r
|
223 |
|
|
gc_B pidigits 71.84u 0.08s 71.98r
|
224 |
|
|
|
225 |
|
|
threadring 50000000
|
226 |
|
|
gcc -O2 threadring.c -lpthread 30.91u 164.33s 204.57r
|
227 |
|
|
gccgo -O2 threadring.go 87.12u 460.04s 447.61r
|
228 |
|
|
gc threadring 38.55u 0.00s 38.56r # *** +16%
|
229 |
|
|
|
230 |
|
|
chameneos 6000000
|
231 |
|
|
gcc -O2 chameneosredux.c -lpthread 17.93u 323.65s 88.47r
|
232 |
|
|
gc chameneosredux 21.72u 0.00s 21.73r
|
233 |
|
|
|
234 |
|
|
August 10 2009
|
235 |
|
|
|
236 |
|
|
# In-place versions for some bignum operations.
|
237 |
|
|
pidigits 10000
|
238 |
|
|
gcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57r
|
239 |
|
|
gc pidigits 55.22u 0.04s 55.29r # *** -23%
|
240 |
|
|
gc_B pidigits 55.49u 0.02s 55.60r # *** -23%
|
241 |
|
|
|
242 |
|
|
September 3 2009
|
243 |
|
|
|
244 |
|
|
# New 6g inlines slices, has a few other tweaks.
|
245 |
|
|
# Complete rerun. Significant changes marked.
|
246 |
|
|
|
247 |
|
|
fasta -n 25000000
|
248 |
|
|
# probably I/O library inefficiencies
|
249 |
|
|
gcc -O2 fasta.c 5.96u 0.00s 5.96r
|
250 |
|
|
gc fasta 10.63u 0.02s 10.66r
|
251 |
|
|
gc_B fasta 9.92u 0.01s 9.94r
|
252 |
|
|
|
253 |
|
|
reverse-complement < output-of-fasta-25000000
|
254 |
|
|
# we don't know - memory cache behavior?
|
255 |
|
|
gcc -O2 reverse-complement.c 1.92u 0.33s 2.93r
|
256 |
|
|
gccgo -O2 reverse-complement.go 6.76u 0.72s 7.58r # +5%
|
257 |
|
|
gc reverse-complement 6.59u 0.70s 7.29r # +2%
|
258 |
|
|
gc_B reverse-complement 5.57u 0.80s 6.37r # -10%
|
259 |
|
|
|
260 |
|
|
nbody 50000000
|
261 |
|
|
# math.Sqrt needs to be in assembly; inlining is probably the other 50%
|
262 |
|
|
# also loop alignment appears to be critical
|
263 |
|
|
gcc -O2 nbody.c 21.28u 0.00s 21.28r
|
264 |
|
|
gccgo -O2 nbody.go 119.21u 0.00s 119.22r # +2%
|
265 |
|
|
gc nbody 109.72u 0.00s 109.78r # + 28% *****
|
266 |
|
|
gc_B nbody 85.90u 0.00s 85.91r
|
267 |
|
|
|
268 |
|
|
binary-tree 15 # too slow to use 20
|
269 |
|
|
# memory allocation and garbage collection
|
270 |
|
|
gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r
|
271 |
|
|
gccgo -O2 binary-tree.go 1.88u 0.54s 2.42r # +17%
|
272 |
|
|
gccgo -O2 binary-tree-freelist.go 0.01u 0.01s 0.02r
|
273 |
|
|
gc binary-tree 8.94u 0.01s 8.96r # -2%
|
274 |
|
|
gc binary-tree-freelist 0.47u 0.01s 0.48r
|
275 |
|
|
|
276 |
|
|
fannkuch 12
|
277 |
|
|
# bounds checking is half the difference
|
278 |
|
|
# rest might be registerization
|
279 |
|
|
gcc -O2 fannkuch.c 60.12u 0.00s 60.12r
|
280 |
|
|
gccgo -O2 fannkuch.go 92.62u 0.00s 92.66r # +41% ***
|
281 |
|
|
gc fannkuch 123.90u 0.00s 123.92r
|
282 |
|
|
gc_B fannkuch 89.71u 0.00s 89.74r # -1%
|
283 |
|
|
|
284 |
|
|
regex-dna 100000
|
285 |
|
|
# regexp code is slow on trivial regexp
|
286 |
|
|
gcc -O2 regex-dna.c -lpcre 0.88u 0.00s 0.88r
|
287 |
|
|
gc regex-dna 25.77u 0.01s 25.79r # -5%
|
288 |
|
|
gc_B regex-dna 26.05u 0.02s 26.09r # -12% ***
|
289 |
|
|
|
290 |
|
|
spectral-norm 5500
|
291 |
|
|
# possibly inline evalA
|
292 |
|
|
gcc -O2 spectral-norm.c -lm 11.51u 0.00s 11.51r
|
293 |
|
|
gccgo -O2 spectral-norm.go 11.95u 0.00s 11.96r
|
294 |
|
|
gc spectral-norm 24.23u 0.00s 24.23r
|
295 |
|
|
gc_B spectral-norm 23.83u 0.00s 23.84r
|
296 |
|
|
|
297 |
|
|
k-nucleotide 1000000
|
298 |
|
|
# string maps are slower than glib string maps
|
299 |
|
|
gcc -O2 -I/usr/include/glib-2.0 -I/usr/lib/glib-2.0/include k-nucleotide.c -lglib-2.0 10.68u 0.04s 10.72r
|
300 |
|
|
gccgo -O2 k-nucleotide.go 23.03u 0.88s 23.92r
|
301 |
|
|
gc k-nucleotide 15.79u 0.05s 15.85r # -5% (but this one seems to vary by more than that)
|
302 |
|
|
gc_B k-nucleotide 17.88u 0.05s 17.95r # +8% (ditto)
|
303 |
|
|
|
304 |
|
|
mandelbrot 16000
|
305 |
|
|
gcc -O2 mandelbrot.c 56.17u 0.02s 56.20r
|
306 |
|
|
gccgo -O2 mandelbrot.go 56.74u 0.02s 56.79r # -1%
|
307 |
|
|
gc mandelbrot 63.31u 0.01s 63.35r # -1%
|
308 |
|
|
gc_B mandelbrot 63.29u 0.00s 63.31r # -1%
|
309 |
|
|
|
310 |
|
|
meteor 2100
|
311 |
|
|
# we don't know
|
312 |
|
|
gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
|
313 |
|
|
gccgo -O2 meteor-contest.go 0.11u 0.00s 0.12r
|
314 |
|
|
gc meteor-contest 0.18u 0.00s 0.19r
|
315 |
|
|
gc_B meteor-contest 0.17u 0.00s 0.18r
|
316 |
|
|
|
317 |
|
|
pidigits 10000
|
318 |
|
|
# bignum is slower than gmp
|
319 |
|
|
gcc -O2 pidigits.c -lgmp 2.56u 0.00s 2.57r
|
320 |
|
|
gc pidigits 55.87u 0.03s 55.91r
|
321 |
|
|
gc_B pidigits 55.93u 0.03s 55.99r
|
322 |
|
|
|
323 |
|
|
# these tests are compared using real time, since they run multiple processors
|
324 |
|
|
# accuracy probably low
|
325 |
|
|
threadring 50000000
|
326 |
|
|
gcc -O2 threadring.c -lpthread 26.31u 164.69s 199.92r # -2%
|
327 |
|
|
gccgo -O2 threadring.go 87.90u 487.26s 472.81r # +6%
|
328 |
|
|
gc threadring 28.89u 0.00s 28.90r # -25% ***
|
329 |
|
|
|
330 |
|
|
chameneos 6000000
|
331 |
|
|
gcc -O2 chameneosredux.c -lpthread 16.41u 296.91s 81.17r # -8%
|
332 |
|
|
gc chameneosredux 19.97u 0.00s 19.97r # -8%
|
333 |
|
|
|
334 |
|
|
Sep 22, 2009
|
335 |
|
|
|
336 |
|
|
# 6g inlines sliceslice in most cases.
|
337 |
|
|
|
338 |
|
|
fasta -n 25000000
|
339 |
|
|
# probably I/O library inefficiencies
|
340 |
|
|
gc fasta 10.24u 0.00s 10.25r # -4%
|
341 |
|
|
gc_B fasta 9.68u 0.01s 9.69r # -3%
|
342 |
|
|
|
343 |
|
|
reverse-complement < output-of-fasta-25000000
|
344 |
|
|
# we don't know - memory cache behavior?
|
345 |
|
|
gc reverse-complement 6.67u 0.69s 7.37r # +1%
|
346 |
|
|
gc_B reverse-complement 6.00u 0.64s 6.65r # +7%
|
347 |
|
|
|
348 |
|
|
nbody -n 50000000
|
349 |
|
|
# math.Sqrt needs to be in assembly; inlining is probably the other 50%
|
350 |
|
|
# also loop alignment appears to be critical
|
351 |
|
|
gc nbody 86.27u 0.00s 86.29r # -21%
|
352 |
|
|
gc_B nbody 104.52u 0.00s 104.54r # +22%
|
353 |
|
|
|
354 |
|
|
fannkuch 12
|
355 |
|
|
# bounds checking is half the difference
|
356 |
|
|
# rest might be registerization
|
357 |
|
|
gc fannkuch 128.36u 0.00s 128.37r # +4%
|
358 |
|
|
gc_B fannkuch 89.32u 0.00s 89.34r
|
359 |
|
|
|
360 |
|
|
regex-dna 100000
|
361 |
|
|
# regexp code is slow on trivial regexp
|
362 |
|
|
gc regex-dna 24.82u 0.01s 24.86r # -4%
|
363 |
|
|
gc_B regex-dna 24.55u 0.01s 24.57r # -6%
|
364 |
|
|
|
365 |
|
|
spectral-norm 5500
|
366 |
|
|
# possibly inline evalA
|
367 |
|
|
gc spectral-norm 24.05u 0.00s 24.07r # -1%
|
368 |
|
|
gc_B spectral-norm 23.60u 0.00s 23.65r # -1%
|
369 |
|
|
|
370 |
|
|
k-nucleotide 1000000
|
371 |
|
|
# string maps are slower than glib string maps
|
372 |
|
|
gc k-nucleotide 17.84u 0.04s 17.89r # +13% but mysterious variation continues
|
373 |
|
|
gc_B k-nucleotide 15.56u 0.08s 15.65r # -13% (ditto)
|
374 |
|
|
|
375 |
|
|
mandelbrot 16000
|
376 |
|
|
gc mandelbrot 64.08u 0.01s 64.11r # +1%
|
377 |
|
|
gc_B mandelbrot 64.04u 0.00s 64.05r # +1%
|
378 |
|
|
|
379 |
|
|
pidigits 10000
|
380 |
|
|
# bignum is slower than gmp
|
381 |
|
|
gc pidigits 58.68u 0.02s 58.72r # +5%
|
382 |
|
|
gc_B pidigits 58.86u 0.05s 58.99r # +5%
|
383 |
|
|
|
384 |
|
|
# these tests are compared using real time, since they run multiple processors
|
385 |
|
|
# accuracy probably low
|
386 |
|
|
threadring 50000000
|
387 |
|
|
gc threadring 32.70u 0.02s 32.77r # +13%
|
388 |
|
|
|
389 |
|
|
chameneos 6000000
|
390 |
|
|
gc chameneosredux 26.62u 0.00s 26.63r # +13%
|
391 |
|
|
|
392 |
|
|
Sep 24, 2009
|
393 |
|
|
|
394 |
|
|
# Sqrt now in assembler for 6g.
|
395 |
|
|
nbody -n 50000000
|
396 |
|
|
# remember, at least for 6g, alignment of loops may be important
|
397 |
|
|
gcc -O2 nbody.c 21.24u 0.00s 21.25r
|
398 |
|
|
gccgo -O2 nbody.go 121.03u 0.00s 121.04r
|
399 |
|
|
gc nbody 30.26u 0.00s 30.27r # -65% ***
|
400 |
|
|
gc_B nbody 30.20u 0.02s 30.22r # -72% ***
|
401 |
|
|
|
402 |
|
|
Nov 13 2009
|
403 |
|
|
|
404 |
|
|
# fix bug in regexp; take performance hit. good regexps will come in time.
|
405 |
|
|
regex-dna 100000
|
406 |
|
|
gcc -O2 regex-dna.c -lpcre 0.92u 0.00s 0.94r
|
407 |
|
|
gc regex-dna 29.78u 0.03s 29.83r
|
408 |
|
|
gc_B regex-dna 32.63u 0.03s 32.74r
|
409 |
|
|
|
410 |
|
|
Nov 24 2009
|
411 |
|
|
|
412 |
|
|
# Roger Peppe's rewrite of the benchmark
|
413 |
|
|
chameneos 6000000
|
414 |
|
|
gcc -O2 chameneosredux.c -lpthread 18.00u 303.29s 83.64r
|
415 |
|
|
gc chameneosredux 12.10u 0.00s 12.10r # 2.22X faster
|
416 |
|
|
|
417 |
|
|
Jan 6, 2010
|
418 |
|
|
|
419 |
|
|
# Long-overdue update. All numbers included in this complete run.
|
420 |
|
|
# Some programs (e.g. reverse-complement) rewritten for speed.
|
421 |
|
|
# Regular expressions much faster in common cases (although still far behind PCRE)
|
422 |
|
|
# Bignum stuff improved
|
423 |
|
|
# Better (but sometimes slower) locking in channels.
|
424 |
|
|
|
425 |
|
|
fasta -n 25000000
|
426 |
|
|
gcc -O2 fasta.c 5.99u 0.01s 6.00r
|
427 |
|
|
gc fasta 9.11u 0.00s 9.12r # -11%
|
428 |
|
|
gc_B fasta 8.60u 0.00s 8.62r # +12% ??
|
429 |
|
|
|
430 |
|
|
reverse-complement < output-of-fasta-25000000
|
431 |
|
|
gcc -O2 reverse-complement.c 2.00u 0.80s 9.54r
|
432 |
|
|
# gccgo -O2 reverse-complement.go 4.57u 0.35s 4.94r # 33% faster
|
433 |
|
|
gc reverse-complement 2.01u 0.38s 2.40r # 3.3X faster
|
434 |
|
|
gc_B reverse-complement 1.88u 0.36s 2.24r # 3.2X faster
|
435 |
|
|
GOGC=off
|
436 |
|
|
gc reverse-complement 2.01u 0.35s 2.37r
|
437 |
|
|
gc_B reverse-complement 1.86u 0.32s 2.19r
|
438 |
|
|
|
439 |
|
|
nbody -n 50000000
|
440 |
|
|
gcc -O2 nbody.c 21.28u 0.00s 21.31r
|
441 |
|
|
gccgo -O2 nbody.go 80.02u 0.00s 80.05r # 33% faster
|
442 |
|
|
gc nbody 30.13u 0.00s 30.13r
|
443 |
|
|
gc_B nbody 29.89u 0.01s 29.91r
|
444 |
|
|
|
445 |
|
|
binary-tree 15 # too slow to use 20
|
446 |
|
|
gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.87r
|
447 |
|
|
gccgo -O2 binary-tree.go 4.82u 0.41s 5.24r # 2.5X slower
|
448 |
|
|
gc binary-tree 7.23u 0.01s 7.25r # # -19%
|
449 |
|
|
gc binary-tree-freelist 0.43u 0.00s 0.44r # -9%
|
450 |
|
|
|
451 |
|
|
fannkuch 12
|
452 |
|
|
gcc -O2 fannkuch.c 60.17u 0.00s 60.17r
|
453 |
|
|
gccgo -O2 fannkuch.go 78.47u 0.01s 78.49r
|
454 |
|
|
gc fannkuch 128.86u 0.00s 128.96r
|
455 |
|
|
gc_B fannkuch 90.17u 0.00s 90.21r
|
456 |
|
|
|
457 |
|
|
regex-dna 100000
|
458 |
|
|
gcc -O2 regex-dna.c -lpcre 0.90u 0.00s 0.92r
|
459 |
|
|
gc regex-dna 9.48u 0.01s 9.50r # 3.1X faster
|
460 |
|
|
gc_B regex-dna 9.08u 0.00s 9.10r # 3.6X faster
|
461 |
|
|
|
462 |
|
|
spectral-norm 5500
|
463 |
|
|
gcc -O2 spectral-norm.c -lm 11.48u 0.00s 11.48r
|
464 |
|
|
gccgo -O2 spectral-norm.go 11.68u 0.00s 11.70r
|
465 |
|
|
gc spectral-norm 23.98u 0.00s 23.99r
|
466 |
|
|
gc_B spectral-norm 23.68u 0.00s 23.69r
|
467 |
|
|
|
468 |
|
|
k-nucleotide 1000000
|
469 |
|
|
gcc -O2 k-nucleotide.c 10.85u 0.04s 10.90r
|
470 |
|
|
gccgo -O2 k-nucleotide.go 25.26u 0.87s 26.14r
|
471 |
|
|
gc k-nucleotide 15.28u 0.06s 15.37r # restored; mysterious variation continues
|
472 |
|
|
gc_B k-nucleotide 15.97u 0.03s 16.00r
|
473 |
|
|
|
474 |
|
|
mandelbrot 16000
|
475 |
|
|
gcc -O2 mandelbrot.c 56.12u 0.01s 56.15r
|
476 |
|
|
gccgo -O2 mandelbrot.go 56.86u 0.01s 56.89r
|
477 |
|
|
gc mandelbrot 66.05u 0.00s 66.07r # -3%
|
478 |
|
|
gc_B mandelbrot 66.06u 0.00s 66.07r # -3%
|
479 |
|
|
|
480 |
|
|
meteor 2100
|
481 |
|
|
gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
|
482 |
|
|
gccgo -O2 meteor-contest.go 0.12u 0.00s 0.12r
|
483 |
|
|
gc meteor-contest 0.17u 0.00s 0.17r
|
484 |
|
|
gc_B meteor-contest 0.15u 0.00s 0.16r
|
485 |
|
|
|
486 |
|
|
pidigits 10000
|
487 |
|
|
gcc -O2 pidigits.c -lgmp 2.57u 0.00s 2.59r
|
488 |
|
|
gc pidigits 38.27u 0.02s 38.30r # 1.5X faster
|
489 |
|
|
gc_B pidigits 38.27u 0.02s 38.31r # 1.5X faster
|
490 |
|
|
|
491 |
|
|
threadring 50000000
|
492 |
|
|
gcc -O2 threadring.c 37.11u 170.59s 212.75r
|
493 |
|
|
gccgo -O2 threadring.go 89.67u 447.56s 442.55r # -6.5%
|
494 |
|
|
gc threadring 36.08u 0.04s 36.15r # +10%
|
495 |
|
|
|
496 |
|
|
chameneos 6000000
|
497 |
|
|
gcc -O2 chameneosredux.c -lpthread 19.02u 331.08s 90.79r
|
498 |
|
|
gc chameneosredux 12.54u 0.00s 12.55r
|
499 |
|
|
|
500 |
|
|
Oct 19, 2010
|
501 |
|
|
|
502 |
|
|
# Another long-overdue update. Some of the code is new; parallel versions
|
503 |
|
|
# of some are added. A few significant improvements.
|
504 |
|
|
|
505 |
|
|
fasta -n 25000000
|
506 |
|
|
gcc -O2 fasta.c 4.92u 0.00s 4.93r
|
507 |
|
|
gccgo -O2 fasta.go 3.31u 0.00s 3.34r # new code
|
508 |
|
|
gc fasta 3.68u 0.00s 3.69r # 2.5X faster with no code
|
509 |
|
|
gc_B fasta 3.68u 0.00s 3.69r # 2.3X faster with no code
|
510 |
|
|
|
511 |
|
|
reverse-complement < output-of-fasta-25000000
|
512 |
|
|
gcc -O2 reverse-complement.c 1.93u 0.81s 11.24r
|
513 |
|
|
gccgo -O2 reverse-complement.go 1.58u 0.43s 2.04r # first run with new code?
|
514 |
|
|
gc reverse-complement 1.84u 0.34s 2.20r # 10% faster
|
515 |
|
|
gc_B reverse-complement 1.85u 0.32s 2.18r
|
516 |
|
|
|
517 |
|
|
nbody -n 50000000
|
518 |
|
|
gcc -O2 nbody.c 21.35u 0.00s 21.36r
|
519 |
|
|
gccgo -O2 nbody.go 21.62u 0.00s 21.66r # 3.7X faster - why??
|
520 |
|
|
gc nbody 29.78u 0.00s 29.79r
|
521 |
|
|
gc_B nbody 29.72u 0.00s 29.72r
|
522 |
|
|
|
523 |
|
|
binary-tree 15 # too slow to use 20
|
524 |
|
|
gcc -O2 binary-tree.c -lm 0.86u 0.00s 0.88r
|
525 |
|
|
gccgo -O2 binary-tree.go 4.05u 0.02s 4.08r # 28% faster
|
526 |
|
|
gccgo -O2 binary-tree-freelist 0.34u 0.08s 0.34r
|
527 |
|
|
gc binary-tree 5.94u 0.00s 5.95r # 20% faster
|
528 |
|
|
gc binary-tree-freelist 0.50u 0.01s 0.54r
|
529 |
|
|
|
530 |
|
|
fannkuch 12
|
531 |
|
|
gcc -O2 fannkuch.c 60.45u 0.00s 60.45r
|
532 |
|
|
gccgo -O2 fannkuch.go 64.64u 0.00s 64.64r
|
533 |
|
|
gccgo -O2 fannkuch-parallel.go 115.63u 0.00s 31.58r
|
534 |
|
|
gc fannkuch 126.52u 0.04s 126.68r
|
535 |
|
|
gc fannkuch-parallel 238.82u 0.10s 65.93r # GOMAXPROCS=4
|
536 |
|
|
gc_B fannkuch 88.99u 0.00s 89.02r
|
537 |
|
|
|
538 |
|
|
regex-dna 100000
|
539 |
|
|
gcc -O2 regex-dna.c -lpcre 0.89u 0.00s 0.89r
|
540 |
|
|
gc regex-dna 8.99u 0.02s 9.03r
|
541 |
|
|
gc regex-dna-parallel 8.94u 0.02s 3.68r # GOMAXPROCS=4
|
542 |
|
|
gc_B regex-dna 9.12u 0.00s 9.14r
|
543 |
|
|
|
544 |
|
|
spectral-norm 5500
|
545 |
|
|
gcc -O2 spectral-norm.c -lm 11.55u 0.00s 11.57r
|
546 |
|
|
gccgo -O2 spectral-norm.go 11.73u 0.00s 11.75r
|
547 |
|
|
gc spectral-norm 23.74u 0.00s 23.79r
|
548 |
|
|
gc_B spectral-norm 24.49u 0.02s 24.54r
|
549 |
|
|
|
550 |
|
|
k-nucleotide 1000000
|
551 |
|
|
gcc -O2 k-nucleotide.c 11.44u 0.06s 11.50r
|
552 |
|
|
gccgo -O2 k-nucleotide.go 8.65u 0.04s 8.71r
|
553 |
|
|
gccgo -O2 k-nucleotide-parallel.go 8.75u 0.03s 2.97r # set GOMAXPROCS=4
|
554 |
|
|
gc k-nucleotide 14.92u 0.05s 15.01r
|
555 |
|
|
gc k-nucleotide-parallel 16.96u 0.06s 6.53r # set GOMAXPROCS=4
|
556 |
|
|
gc_B k-nucleotide 15.97u 0.03s 16.08r
|
557 |
|
|
|
558 |
|
|
mandelbrot 16000
|
559 |
|
|
gcc -O2 mandelbrot.c 56.32u 0.00s 56.35r
|
560 |
|
|
gccgo -O2 mandelbrot.go 55.62u 0.02s 55.77r
|
561 |
|
|
gc mandelbrot 64.85u 0.01s 64.94r
|
562 |
|
|
gc_B mandelbrot 65.02u 0.01s 65.14r
|
563 |
|
|
|
564 |
|
|
meteor 2100
|
565 |
|
|
gcc -O2 meteor-contest.c 0.10u 0.00s 0.10r
|
566 |
|
|
gccgo -O2 meteor-contest.go 0.10u 0.00s 0.11r
|
567 |
|
|
gc meteor-contest 0.17u 0.00s 0.18r
|
568 |
|
|
gc_B meteor-contest 0.16u 0.00s 0.16r
|
569 |
|
|
|
570 |
|
|
pidigits 10000
|
571 |
|
|
gcc -O2 pidigits.c -lgmp 2.58u 0.00s 2.59r
|
572 |
|
|
gccgo -O2 pidigits.go 14.06u 0.01s 14.09r # first run?
|
573 |
|
|
gc pidigits 8.47u 0.05s 8.55r # 4.5X faster due to package big
|
574 |
|
|
gc_B pidigits 8.33u 0.01s 8.36r # 4.5X faster due to package big
|
575 |
|
|
|
576 |
|
|
threadring 50000000
|
577 |
|
|
gcc -O2 threadring.c 28.18u 153.19s 186.47r
|
578 |
|
|
gccgo -O2 threadring.go 110.10u 516.48s 515.25r
|
579 |
|
|
gc threadring 40.39u 0.00s 40.40r
|
580 |
|
|
|
581 |
|
|
chameneos 6000000
|
582 |
|
|
gcc -O2 chameneosredux.c -lpthread 18.20u 301.55s 83.10r
|
583 |
|
|
gccgo -O2 chameneosredux.go 52.22u 324.54s 201.21r
|
584 |
|
|
gc chameneosredux 13.52u 0.00s 13.54r
|
585 |
|
|
|
586 |
|
|
Dec 14, 2010
|
587 |
|
|
|
588 |
|
|
# Improved regex code (same algorithm) gets ~30%.
|
589 |
|
|
|
590 |
|
|
regex-dna 100000
|
591 |
|
|
gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.78r
|
592 |
|
|
gc regex-dna 6.80u 0.00s 6.81r
|
593 |
|
|
gc regex-dna-parallel 6.82u 0.01s 2.75r
|
594 |
|
|
gc_B regex-dna 6.69u 0.02s 6.70r
|
595 |
|
|
|
596 |
|
|
Feb 15, 2011
|
597 |
|
|
|
598 |
|
|
# Improved GC, still single-threaded but more efficient
|
599 |
|
|
|
600 |
|
|
fasta -n 25000000
|
601 |
|
|
gcc -O2 fasta.c 3.40u 0.00s 3.40r
|
602 |
|
|
gccgo -O2 fasta.go 3.51u 0.00s 3.50r
|
603 |
|
|
gc fasta 3.66u 0.01s 3.66r
|
604 |
|
|
gc_B fasta 3.66u 0.00s 3.66r
|
605 |
|
|
|
606 |
|
|
reverse-complement < output-of-fasta-25000000
|
607 |
|
|
gcc -O2 reverse-complement.c 1.86u 1.29s 4.93r
|
608 |
|
|
gccgo -O2 reverse-complement.go 2.18u 0.41s 2.60r
|
609 |
|
|
gc reverse-complement 1.67u 0.48s 2.15r
|
610 |
|
|
gc_B reverse-complement 1.71u 0.45s 2.15r
|
611 |
|
|
|
612 |
|
|
nbody -n 50000000
|
613 |
|
|
gcc -O2 -lm nbody.c 21.64u 0.00s 21.64r
|
614 |
|
|
gccgo -O2 nbody.go 21.46u 0.00s 21.45r
|
615 |
|
|
gc nbody 29.07u 0.00s 29.06r
|
616 |
|
|
gc_B nbody 31.61u 0.00s 31.61r
|
617 |
|
|
|
618 |
|
|
binary-tree 15 # too slow to use 20
|
619 |
|
|
gcc -O2 binary-tree.c -lm 0.88u 0.00s 0.87r
|
620 |
|
|
gccgo -O2 binary-tree.go 2.74u 0.07s 2.81r
|
621 |
|
|
gccgo -O2 binary-tree-freelist.go 0.01u 0.00s 0.00r
|
622 |
|
|
gc binary-tree 4.22u 0.02s 4.24r
|
623 |
|
|
gc binary-tree-freelist 0.54u 0.02s 0.55r
|
624 |
|
|
|
625 |
|
|
fannkuch 12
|
626 |
|
|
gcc -O2 fannkuch.c 57.64u 0.00s 57.64r
|
627 |
|
|
gccgo -O2 fannkuch.go 65.79u 0.00s 65.82r
|
628 |
|
|
gccgo -O2 fannkuch-parallel.go 160.91u 0.02s 43.90r
|
629 |
|
|
gc fannkuch 126.36u 0.03s 126.53r
|
630 |
|
|
gc fannkuch-parallel 175.23u 0.04s 45.49r
|
631 |
|
|
gc_B fannkuch 89.23u 0.00s 89.24r
|
632 |
|
|
|
633 |
|
|
regex-dna 100000
|
634 |
|
|
gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.80r
|
635 |
|
|
gccgo -O2 regex-dna.go 12.38u 0.10s 12.52r
|
636 |
|
|
gccgo -O2 regex-dna-parallel.go 43.96u 4.64s 15.11r
|
637 |
|
|
gc regex-dna 7.03u 0.01s 7.05r
|
638 |
|
|
gc regex-dna-parallel 6.85u 0.05s 2.70r
|
639 |
|
|
gc_B regex-dna 6.87u 0.02s 6.89r
|
640 |
|
|
|
641 |
|
|
spectral-norm 5500
|
642 |
|
|
gcc -O2 spectral-norm.c -lm 12.29u 0.00s 12.28r
|
643 |
|
|
gccgo -O2 spectral-norm.go 11.79u 0.00s 11.79r
|
644 |
|
|
gc spectral-norm 24.00u 0.02s 24.05r
|
645 |
|
|
gc_B spectral-norm 24.59u 0.01s 24.59r
|
646 |
|
|
|
647 |
|
|
k-nucleotide 1000000
|
648 |
|
|
gcc -O2 k-nucleotide.c 9.75u 0.07s 9.82r
|
649 |
|
|
gccgo -O2 k-nucleotide.go 8.92u 0.06s 8.98r
|
650 |
|
|
gccgo -O2 k-nucleotide-parallel.go 8.40u 0.04s 2.76r
|
651 |
|
|
gc k-nucleotide 17.01u 0.03s 17.04r
|
652 |
|
|
gc k-nucleotide-parallel 16.51u 0.08s 6.21r
|
653 |
|
|
gc_B k-nucleotide 16.94u 0.08s 17.02r
|
654 |
|
|
|
655 |
|
|
mandelbrot 16000
|
656 |
|
|
gcc -O2 mandelbrot.c 54.60u 0.00s 54.66r
|
657 |
|
|
gccgo -O2 mandelbrot.go 59.38u 0.00s 59.41r
|
658 |
|
|
gc mandelbrot 64.93u 0.04s 65.08r
|
659 |
|
|
gc_B mandelbrot 64.85u 0.03s 64.92r
|
660 |
|
|
|
661 |
|
|
meteor 2098
|
662 |
|
|
gcc -O2 meteor-contest.c 0.10u 0.01s 0.10r
|
663 |
|
|
gccgo -O2 meteor-contest.go 0.11u 0.00s 0.11r
|
664 |
|
|
gc meteor-contest 0.18u 0.00s 0.17r
|
665 |
|
|
gc_B meteor-contest 0.17u 0.00s 0.16r
|
666 |
|
|
|
667 |
|
|
pidigits 10000
|
668 |
|
|
gcc -O2 pidigits.c -lgmp 2.24u 0.00s 2.23r
|
669 |
|
|
gccgo -O2 pidigits.go 14.05u 0.00s 14.06r
|
670 |
|
|
gc pidigits 6.34u 0.05s 6.38r
|
671 |
|
|
gc_B pidigits 6.37u 0.02s 6.38r
|
672 |
|
|
|
673 |
|
|
threadring 50000000
|
674 |
|
|
gcc -O2 threadring.c 30.50u 258.05s 325.72r
|
675 |
|
|
gccgo -O2 threadring.go 92.87u 748.39s 728.46r
|
676 |
|
|
gc threadring 38.03u 0.01s 38.04r
|
677 |
|
|
|
678 |
|
|
# Apr 15, 2011
|
679 |
|
|
# Move to new machine, Intel Xeon E5520@2.27GHz.
|
680 |
|
|
# (Was Opteron(tm) Processor 8214 HE)
|
681 |
|
|
|
682 |
|
|
fasta -n 25000000
|
683 |
|
|
OLD:
|
684 |
|
|
gcc -O2 fasta.c 3.39u 0.04s 3.42r
|
685 |
|
|
gccgo -O2 fasta.go 3.52u 0.00s 3.52r
|
686 |
|
|
gc fasta 3.63u 0.04s 3.67r
|
687 |
|
|
gc_B fasta 3.66u 0.00s 3.66r
|
688 |
|
|
NEW:
|
689 |
|
|
gcc -O2 fasta.c 1.45u 0.02s 1.47r
|
690 |
|
|
gccgo -O2 fasta.go 1.51u 0.01s 1.51r
|
691 |
|
|
gc fasta 2.04u 0.00s 2.04r
|
692 |
|
|
gc_B fasta 2.05u 0.00s 2.04r
|
693 |
|
|
|
694 |
|
|
reverse-complement < output-of-fasta-25000000
|
695 |
|
|
OLD:
|
696 |
|
|
gcc -O2 reverse-complement.c 1.87u 1.51s 7.02r
|
697 |
|
|
gccgo -O2 reverse-complement.go 1.56u 0.54s 3.37r
|
698 |
|
|
gc reverse-complement 1.73u 0.36s 2.08r
|
699 |
|
|
gc_B reverse-complement 1.75u 0.37s 2.12r
|
700 |
|
|
NEW:
|
701 |
|
|
gcc -O2 reverse-complement.c 1.20u 0.47s 12.96r
|
702 |
|
|
gccgo -O2 reverse-complement.go 0.88u 0.14s 1.01r
|
703 |
|
|
gc reverse-complement 1.13u 0.17s 1.30r
|
704 |
|
|
gc_B reverse-complement 1.11u 0.09s 1.20r
|
705 |
|
|
|
706 |
|
|
nbody -n 50000000
|
707 |
|
|
OLD:
|
708 |
|
|
gcc -O2 -lm nbody.c 21.90u 0.00s 21.92r
|
709 |
|
|
gccgo -O2 nbody.go 23.12u 0.03s 23.19r
|
710 |
|
|
gc nbody 29.07u 0.00s 29.07r
|
711 |
|
|
gc_B nbody 31.84u 0.00s 31.85r
|
712 |
|
|
NEW:
|
713 |
|
|
gcc -O2 -lm nbody.c 13.01u 0.00s 13.03r
|
714 |
|
|
gccgo -O2 nbody.go 13.35u 0.00s 13.37r
|
715 |
|
|
gc nbody 21.78u 0.00s 21.82r
|
716 |
|
|
gc_B nbody 21.72u 0.00s 21.76r
|
717 |
|
|
|
718 |
|
|
binary-tree 15 # too slow to use 20
|
719 |
|
|
OLD:
|
720 |
|
|
gcc -O2 binary-tree.c -lm 0.83u 0.02s 0.84r
|
721 |
|
|
gccgo -O2 binary-tree.go 2.61u 0.02s 2.62r
|
722 |
|
|
gccgo -O2 binary-tree-freelist.go 0.32u 0.01s 0.32r
|
723 |
|
|
gc binary-tree 3.93u 0.04s 3.97r
|
724 |
|
|
gc binary-tree-freelist 0.47u 0.03s 0.50r
|
725 |
|
|
NEW:
|
726 |
|
|
gcc -O2 binary-tree.c -lm 0.60u 0.00s 0.59r
|
727 |
|
|
gccgo -O2 binary-tree.go 1.53u 0.00s 1.52r
|
728 |
|
|
gccgo -O2 binary-tree-freelist.go 0.01u 0.00s 0.00r
|
729 |
|
|
gc binary-tree 1.93u 0.02s 1.95r
|
730 |
|
|
gc binary-tree-freelist 0.32u 0.01s 0.32r
|
731 |
|
|
|
732 |
|
|
fannkuch 12
|
733 |
|
|
OLD:
|
734 |
|
|
gcc -O2 fannkuch.c 57.64u 0.00s 57.64r
|
735 |
|
|
gccgo -O2 fannkuch.go 65.56u 0.01s 65.65r
|
736 |
|
|
gccgo -O2 fannkuch-parallel.go 179.12u 0.00s 49.82r
|
737 |
|
|
gc fannkuch 126.39u 0.00s 126.39r
|
738 |
|
|
gc fannkuch-parallel 172.49u 0.02s 45.44r
|
739 |
|
|
gc_B fannkuch 89.30u 0.00s 89.28r
|
740 |
|
|
NEW:
|
741 |
|
|
gcc -O2 fannkuch.c 45.17u 0.00s 45.26r
|
742 |
|
|
gccgo -O2 fannkuch.go 53.63u 0.00s 53.73r
|
743 |
|
|
gccgo -O2 fannkuch-parallel.go 216.72u 0.00s 58.42r
|
744 |
|
|
gc fannkuch 108.21u 0.00s 108.44r
|
745 |
|
|
gc fannkuch-parallel 227.20u 0.00s 57.27r
|
746 |
|
|
gc_B fannkuch 56.14u 0.00s 56.26r
|
747 |
|
|
|
748 |
|
|
regex-dna 100000
|
749 |
|
|
OLD:
|
750 |
|
|
gcc -O2 regex-dna.c -lpcre 0.77u 0.01s 0.78r
|
751 |
|
|
gccgo -O2 regex-dna.go 10.15u 0.02s 10.23r
|
752 |
|
|
gccgo -O2 regex-dna-parallel.go 33.81u 3.22s 11.62r
|
753 |
|
|
gc regex-dna 6.52u 0.04s 6.56r
|
754 |
|
|
gc regex-dna-parallel 6.84u 0.03s 2.70r
|
755 |
|
|
gc_B regex-dna 6.83u 0.01s 6.84r
|
756 |
|
|
NEW:
|
757 |
|
|
gcc -O2 regex-dna.c -lpcre 0.47u 0.00s 0.47r
|
758 |
|
|
gccgo -O2 regex-dna.go 6.00u 0.00s 6.00r
|
759 |
|
|
gccgo -O2 regex-dna-parallel.go 44.54u 1.57s 6.51r
|
760 |
|
|
gc regex-dna 5.41u 0.01s 5.42r
|
761 |
|
|
gc regex-dna-parallel 5.62u 0.01s 2.20r
|
762 |
|
|
gc_B regex-dna 5.50u 0.00s 5.50r
|
763 |
|
|
|
764 |
|
|
spectral-norm 5500
|
765 |
|
|
OLD:
|
766 |
|
|
gcc -O2 spectral-norm.c -lm 12.29u 0.00s 12.28r
|
767 |
|
|
gccgo -O2 spectral-norm.go 11.56u 0.00s 11.55r
|
768 |
|
|
gc spectral-norm 23.98u 0.00s 24.00r
|
769 |
|
|
gc_B spectral-norm 24.62u 0.00s 24.65r
|
770 |
|
|
NEW:
|
771 |
|
|
gcc -O2 spectral-norm.c -lm 15.79u 0.00s 15.82r
|
772 |
|
|
gccgo -O2 spectral-norm.go 15.32u 0.00s 15.35r
|
773 |
|
|
gc spectral-norm 19.62u 0.01s 19.67r
|
774 |
|
|
gc_B spectral-norm 19.62u 0.00s 19.66r
|
775 |
|
|
|
776 |
|
|
k-nucleotide 1000000
|
777 |
|
|
OLD:
|
778 |
|
|
gcc -O2 k-nucleotide.c 9.82u 0.06s 9.87r
|
779 |
|
|
gccgo -O2 k-nucleotide.go 8.30u 0.02s 8.32r
|
780 |
|
|
gccgo -O2 k-nucleotide-parallel.go 8.84u 0.05s 3.02r
|
781 |
|
|
gc k-nucleotide 15.38u 0.07s 15.44r
|
782 |
|
|
gc k-nucleotide-parallel 16.40u 0.03s 5.93r
|
783 |
|
|
gc_B k-nucleotide 15.19u 0.05s 15.23r
|
784 |
|
|
NEW:
|
785 |
|
|
gcc -O2 -k-nucleotide.c 4.88u 0.03s 4.92r
|
786 |
|
|
gccgo -O2 k-nucleotide.go 5.94u 0.01s 5.96r
|
787 |
|
|
gccgo -O2 k-nucleotide-parallel.go 6.44u 0.03s 1.47r
|
788 |
|
|
gc k-nucleotide 9.61u 0.01s 9.63r
|
789 |
|
|
gc k-nucleotide-parallel 9.70u 0.00s 3.39r
|
790 |
|
|
gc_B k-nucleotide 9.19u 0.03s 9.23r
|
791 |
|
|
|
792 |
|
|
mandelbrot 16000
|
793 |
|
|
OLD:
|
794 |
|
|
gcc -O2 mandelbrot.c 54.54u 0.00s 54.56r
|
795 |
|
|
gccgo -O2 mandelbrot.go 59.63u 0.03s 59.67r
|
796 |
|
|
gc mandelbrot 64.82u 0.00s 64.83r
|
797 |
|
|
gc_B mandelbrot 64.84u 0.00s 64.91r
|
798 |
|
|
NEW:
|
799 |
|
|
gcc -O2 mandelbrot.c 36.07u 0.01s 36.15r
|
800 |
|
|
gccgo -O2 mandelbrot.go 43.57u 0.00s 43.66r
|
801 |
|
|
gc mandelbrot 60.66u 0.00s 60.79r
|
802 |
|
|
gc_B mandelbrot 60.90u 0.00s 61.03r
|
803 |
|
|
|
804 |
|
|
meteor 2098
|
805 |
|
|
OLD:
|
806 |
|
|
gcc -O2 meteor-contest.c 0.11u 0.00s 0.10r
|
807 |
|
|
gccgo -O2 meteor-contest.go 0.10u 0.01s 0.10r
|
808 |
|
|
gc meteor-contest 0.18u 0.00s 0.17r
|
809 |
|
|
gc_B meteor-contest 0.17u 0.00s 0.16r
|
810 |
|
|
NEW:
|
811 |
|
|
gcc -O2 meteor-contest.c 0.10u 0.00s 0.09r
|
812 |
|
|
gccgo -O2 meteor-contest.go 0.10u 0.00s 0.09r
|
813 |
|
|
gc meteor-contest 0.14u 0.00s 0.14r
|
814 |
|
|
gc_B meteor-contest 0.13u 0.00s 0.13r
|
815 |
|
|
|
816 |
|
|
pidigits 10000
|
817 |
|
|
OLD:
|
818 |
|
|
gcc -O2 pidigits.c -lgmp 2.22u 0.00s 2.21r
|
819 |
|
|
gccgo -O2 pidigits.go 13.39u 0.00s 13.40r
|
820 |
|
|
gc pidigits 6.42u 0.04s 6.45r
|
821 |
|
|
gc_B pidigits 6.45u 0.02s 6.47r
|
822 |
|
|
NEW:
|
823 |
|
|
gcc -O2 pidigits.c -lgmp 2.27u 0.00s 2.29r
|
824 |
|
|
gccgo -O2 pidigits.go 9.21u 0.00s 9.22r
|
825 |
|
|
gc pidigits 3.60u 0.00s 3.60r
|
826 |
|
|
gc_B pidigits 3.56u 0.02s 3.58r
|
827 |
|
|
|
828 |
|
|
threadring 50000000
|
829 |
|
|
OLD:
|
830 |
|
|
gcc -O2 threadring.c -lpthread 34.51u 267.95s 336.12r
|
831 |
|
|
gccgo -O2 threadring.go 103.51u 588.57s 627.16r
|
832 |
|
|
gc threadring 54.68u 0.00s 54.73r
|
833 |
|
|
NEW:
|
834 |
|
|
gcc -O2 threadring.c 32.00u 259.39s 369.74r
|
835 |
|
|
gccgo -O2 threadring.go 133.06u 546.02s 595.33r
|
836 |
|
|
gc threadring 16.75u 0.02s 16.80r
|
837 |
|
|
|
838 |
|
|
chameneos 6000000
|
839 |
|
|
OLD:
|
840 |
|
|
gcc -O2 chameneosredux.c -lpthread 12.65u 31.02s 13.33r
|
841 |
|
|
gccgo -O2 chameneosredux.go 47.04u 302.84s 252.29r
|
842 |
|
|
gc chameneosredux 14.14u 0.00s 14.14r
|
843 |
|
|
NEW:
|
844 |
|
|
gcc -O2 chameneosredux.c -lpthread 8.05u 63.43s 11.16r
|
845 |
|
|
gccgo -O2 chameneosredux.go 82.95u 304.37s 207.64r
|
846 |
|
|
gc chameneosredux 9.42u 0.00s 9.43r
|
847 |
|
|
|
848 |
|
|
# May 13, 2011
|
849 |
|
|
# after gc update to inline append when possible - 35% faster
|
850 |
|
|
|
851 |
|
|
regex-dna 100000
|
852 |
|
|
gc regex-dna 3.94u 0.00s 3.95r
|
853 |
|
|
gc regex-dna-parallel 4.15u 0.01s 1.63r
|
854 |
|
|
gc_B regex-dna 4.01u 0.01s 4.02r
|
855 |
|
|
|
856 |
|
|
# Aug 4, 2011
|
857 |
|
|
# After various updates to locking code and some runtime changes.
|
858 |
|
|
# Slowdowns believed due to slower (but more correct) memmove.
|
859 |
|
|
|
860 |
|
|
fannkuch 12
|
861 |
|
|
gccgo -O2 fannkuch.go 51.59u 0.00s 51.69r # -4%
|
862 |
|
|
gccgo -O2 fannkuch-parallel.go 253.17u 0.00s 64.67r # -11%
|
863 |
|
|
gc fannkuch 103.14u 0.00s 103.36r # -5%
|
864 |
|
|
gc fannkuch-parallel 189.63u 0.00s 49.37r # +9%
|
865 |
|
|
gc_B fannkuch 49.19u 0.00s 49.29r # -14%
|
866 |
|
|
|
867 |
|
|
regex-dna 100000
|
868 |
|
|
gc regex-dna 3.78u 0.00s 3.78r # -43%
|
869 |
|
|
gc regex-dna-parallel 3.84u 0.02s 1.48r # -49%
|
870 |
|
|
gc_B regex-dna 3.62u 0.00s 3.63r # -52%
|
871 |
|
|
|
872 |
|
|
k-nucleotide 1000000
|
873 |
|
|
gc k-nucleotide 12.23u 0.02s 12.27r # +27%
|
874 |
|
|
gc k-nucleotide-parallel 12.76u 0.02s 4.37r # +29%
|
875 |
|
|
gc_B k-nucleotide 12.18u 0.01s 12.21r # +33%
|
876 |
|
|
|
877 |
|
|
threadring 50000000
|
878 |
|
|
gc threadring 17.49u 0.00s 17.53r # +4%
|
879 |
|
|
|
880 |
|
|
chameneos 6000000
|
881 |
|
|
gc chameneosredux 7.61u 0.00s 7.63r # -24%
|
882 |
|
|
|
883 |
|
|
Aug 9, 2011
|
884 |
|
|
# After custom algorithms for 1- 2- 4- 8-byte scalars.
|
885 |
|
|
|
886 |
|
|
fannkuch 12
|
887 |
|
|
gc fannkuch-parallel 157.17u 0.00s 41.08r # -17%
|
888 |
|
|
|
889 |
|
|
k-nucleotide 1000000
|
890 |
|
|
gc k-nucleotide 8.72u 0.03s 8.76r # -39%
|
891 |
|
|
gc k-nucleotide-parallel 8.79u 0.01s 3.14r # -39%
|
892 |
|
|
gc_B k-nucleotide 8.65u 0.03s 8.69r # -39%
|
893 |
|
|
|
894 |
|
|
pidigits 10000
|
895 |
|
|
gc pidigits 3.71u 0.02s 3.73r # +4%
|
896 |
|
|
gc_B pidigits 3.73u 0.00s 3.73r # +4%
|
897 |
|
|
|
898 |
|
|
threadring 50000000
|
899 |
|
|
gc threadring 14.51u 0.00s 14.54r # -17%
|
900 |
|
|
|
901 |
|
|
chameneos 6000000
|
902 |
|
|
gc chameneosredux 7.41u 0.00s 7.42r # -3%
|