1 |
24 |
jeremybenn |
/* Include file cached obstack implementation.
|
2 |
|
|
Written by Fred Fish <fnf@cygnus.com>
|
3 |
|
|
Rewritten by Jim Blandy <jimb@cygnus.com>
|
4 |
|
|
|
5 |
|
|
Copyright (C) 1999, 2000, 2002, 2003, 2007, 2008
|
6 |
|
|
Free Software Foundation, Inc.
|
7 |
|
|
|
8 |
|
|
This file is part of GDB.
|
9 |
|
|
|
10 |
|
|
This program is free software; you can redistribute it and/or modify
|
11 |
|
|
it under the terms of the GNU General Public License as published by
|
12 |
|
|
the Free Software Foundation; either version 3 of the License, or
|
13 |
|
|
(at your option) any later version.
|
14 |
|
|
|
15 |
|
|
This program is distributed in the hope that it will be useful,
|
16 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
18 |
|
|
GNU General Public License for more details.
|
19 |
|
|
|
20 |
|
|
You should have received a copy of the GNU General Public License
|
21 |
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
22 |
|
|
|
23 |
|
|
#ifndef BCACHE_H
|
24 |
|
|
#define BCACHE_H 1
|
25 |
|
|
|
26 |
|
|
/* A bcache is a data structure for factoring out duplication in
|
27 |
|
|
read-only structures. You give the bcache some string of bytes S.
|
28 |
|
|
If the bcache already contains a copy of S, it hands you back a
|
29 |
|
|
pointer to its copy. Otherwise, it makes a fresh copy of S, and
|
30 |
|
|
hands you back a pointer to that. In either case, you can throw
|
31 |
|
|
away your copy of S, and use the bcache's.
|
32 |
|
|
|
33 |
|
|
The "strings" in question are arbitrary strings of bytes --- they
|
34 |
|
|
can contain zero bytes. You pass in the length explicitly when you
|
35 |
|
|
call the bcache function.
|
36 |
|
|
|
37 |
|
|
This means that you can put ordinary C objects in a bcache.
|
38 |
|
|
However, if you do this, remember that structs can contain `holes'
|
39 |
|
|
between members, added for alignment. These bytes usually contain
|
40 |
|
|
garbage. If you try to bcache two objects which are identical from
|
41 |
|
|
your code's point of view, but have different garbage values in the
|
42 |
|
|
structure's holes, then the bcache will treat them as separate
|
43 |
|
|
strings, and you won't get the nice elimination of duplicates you
|
44 |
|
|
were hoping for. So, remember to memset your structures full of
|
45 |
|
|
zeros before bcaching them!
|
46 |
|
|
|
47 |
|
|
You shouldn't modify the strings you get from a bcache, because:
|
48 |
|
|
|
49 |
|
|
- You don't necessarily know who you're sharing space with. If I
|
50 |
|
|
stick eight bytes of text in a bcache, and then stick an eight-byte
|
51 |
|
|
structure in the same bcache, there's no guarantee those two
|
52 |
|
|
objects don't actually comprise the same sequence of bytes. If
|
53 |
|
|
they happen to, the bcache will use a single byte string for both
|
54 |
|
|
of them. Then, modifying the structure will change the string. In
|
55 |
|
|
bizarre ways.
|
56 |
|
|
|
57 |
|
|
- Even if you know for some other reason that all that's okay,
|
58 |
|
|
there's another problem. A bcache stores all its strings in a hash
|
59 |
|
|
table. If you modify a string's contents, you will probably change
|
60 |
|
|
its hash value. This means that the modified string is now in the
|
61 |
|
|
wrong place in the hash table, and future bcache probes will never
|
62 |
|
|
find it. So by mutating a string, you give up any chance of
|
63 |
|
|
sharing its space with future duplicates.
|
64 |
|
|
|
65 |
|
|
|
66 |
|
|
Size of bcache VS hashtab:
|
67 |
|
|
|
68 |
|
|
For bcache, the most critical cost is size (or more exactly the
|
69 |
|
|
overhead added by the bcache). It turns out that the bcache is
|
70 |
|
|
remarkably efficient.
|
71 |
|
|
|
72 |
|
|
Assuming a 32-bit system (the hash table slots are 4 bytes),
|
73 |
|
|
ignoring alignment, and limit strings to 255 bytes (1 byte length)
|
74 |
|
|
we get ...
|
75 |
|
|
|
76 |
|
|
bcache: This uses a separate linked list to track the hash chain.
|
77 |
|
|
The numbers show roughly 100% occupancy of the hash table and an
|
78 |
|
|
average chain length of 4. Spreading the slot cost over the 4
|
79 |
|
|
chain elements:
|
80 |
|
|
|
81 |
|
|
4 (slot) / 4 (chain length) + 1 (length) + 4 (chain) = 6 bytes
|
82 |
|
|
|
83 |
|
|
hashtab: This uses a more traditional re-hash algorithm where the
|
84 |
|
|
chain is maintained within the hash table. The table occupancy is
|
85 |
|
|
kept below 75% but we'll assume its perfect:
|
86 |
|
|
|
87 |
|
|
4 (slot) x 4/3 (occupancy) + 1 (length) = 6 1/3 bytes
|
88 |
|
|
|
89 |
|
|
So a perfect hashtab has just slightly larger than an average
|
90 |
|
|
bcache.
|
91 |
|
|
|
92 |
|
|
It turns out that an average hashtab is far worse. Two things
|
93 |
|
|
hurt:
|
94 |
|
|
|
95 |
|
|
- Hashtab's occupancy is more like 50% (it ranges between 38% and
|
96 |
|
|
75%) giving a per slot cost of 4x2 vs 4x4/3.
|
97 |
|
|
|
98 |
|
|
- the string structure needs to be aligned to 8 bytes which for
|
99 |
|
|
hashtab wastes 7 bytes, while for bcache wastes only 3.
|
100 |
|
|
|
101 |
|
|
This gives:
|
102 |
|
|
|
103 |
|
|
hashtab: 4 x 2 + 1 + 7 = 16 bytes
|
104 |
|
|
|
105 |
|
|
bcache 4 / 4 + 1 + 4 + 3 = 9 bytes
|
106 |
|
|
|
107 |
|
|
The numbers of GDB debugging GDB support this. ~40% vs ~70% overhead.
|
108 |
|
|
|
109 |
|
|
|
110 |
|
|
Speed of bcache VS hashtab (the half hash hack):
|
111 |
|
|
|
112 |
|
|
While hashtab has a typical chain length of 1, bcache has a chain
|
113 |
|
|
length of round 4. This means that the bcache will require
|
114 |
|
|
something like double the number of compares after that initial
|
115 |
|
|
hash. In both cases the comparison takes the form:
|
116 |
|
|
|
117 |
|
|
a.length == b.length && memcmp (a.data, b.data, a.length) == 0
|
118 |
|
|
|
119 |
|
|
That is lengths are checked before doing the memcmp.
|
120 |
|
|
|
121 |
|
|
For GDB debugging GDB, it turned out that all lengths were 24 bytes
|
122 |
|
|
(no C++ so only psymbols were cached) and hence, all compares
|
123 |
|
|
required a call to memcmp. As a hack, two bytes of padding
|
124 |
|
|
(mentioned above) are used to store the upper 16 bits of the
|
125 |
|
|
string's hash value and then that is used in the comparison vis:
|
126 |
|
|
|
127 |
|
|
a.half_hash == b.half_hash && a.length == b.length && memcmp
|
128 |
|
|
(a.data, b.data, a.length)
|
129 |
|
|
|
130 |
|
|
The numbers from GDB debugging GDB show this to be a remarkable
|
131 |
|
|
100% effective (only necessary length and memcmp tests being
|
132 |
|
|
performed).
|
133 |
|
|
|
134 |
|
|
Mind you, looking at the wall clock, the same GDB debugging GDB
|
135 |
|
|
showed only marginal speed up (0.780 vs 0.773s). Seems GDB is too
|
136 |
|
|
busy doing something else :-(
|
137 |
|
|
|
138 |
|
|
*/
|
139 |
|
|
|
140 |
|
|
|
141 |
|
|
struct bcache;
|
142 |
|
|
|
143 |
|
|
/* Find a copy of the LENGTH bytes at ADDR in BCACHE. If BCACHE has
|
144 |
|
|
never seen those bytes before, add a copy of them to BCACHE. In
|
145 |
|
|
either case, return a pointer to BCACHE's copy of that string.
|
146 |
|
|
Since the cached value is ment to be read-only, return a const
|
147 |
|
|
buffer. */
|
148 |
|
|
extern void *deprecated_bcache (const void *addr, int length,
|
149 |
|
|
struct bcache *bcache);
|
150 |
|
|
extern const void *bcache (const void *addr, int length,
|
151 |
|
|
struct bcache *bcache);
|
152 |
|
|
|
153 |
|
|
/* Free all the storage used by BCACHE. */
|
154 |
|
|
extern void bcache_xfree (struct bcache *bcache);
|
155 |
|
|
|
156 |
|
|
/* Create a new bcache object. */
|
157 |
|
|
extern struct bcache *bcache_xmalloc (void);
|
158 |
|
|
|
159 |
|
|
/* Print statistics on BCACHE's memory usage and efficacity at
|
160 |
|
|
eliminating duplication. TYPE should be a string describing the
|
161 |
|
|
kind of data BCACHE holds. Statistics are printed using
|
162 |
|
|
`printf_filtered' and its ilk. */
|
163 |
|
|
extern void print_bcache_statistics (struct bcache *bcache, char *type);
|
164 |
|
|
extern int bcache_memory_used (struct bcache *bcache);
|
165 |
|
|
|
166 |
|
|
/* The hash function */
|
167 |
|
|
extern unsigned long hash(const void *addr, int length);
|
168 |
|
|
|
169 |
|
|
#endif /* BCACHE_H */
|