blob: 39cecedf72f39cd8ad668e797a4105cc0ece5c75 [file] [log] [blame]
Neil Booth2a967f32001-05-20 06:26:45 +00001/* Hash tables.
Andreas Jaeger1d088de2003-07-06 08:15:36 +02002 Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
Neil Booth2a967f32001-05-20 06:26:45 +00003
4This program is free software; you can redistribute it and/or modify it
5under the terms of the GNU General Public License as published by the
6Free Software Foundation; either version 2, or (at your option) any
7later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17
18 In other words, you are welcome to use, share and improve this program.
19 You are forbidden to forbid anyone else to use, share and improve
20 what you give them. Help stamp out software-hoarding! */
21
22#include "config.h"
23#include "system.h"
Paolo Bonzini4f4e53dd2004-05-24 10:50:45 +000024#include "symtab.h"
Neil Booth2a967f32001-05-20 06:26:45 +000025
26/* The code below is a specialization of Vladimir Makarov's expandable
27 hash tables (see libiberty/hashtab.c). The abstraction penalty was
28 too high to continue using the generic form. This code knows
29 intrinsically how to calculate a hash value, and how to compare an
30 existing entry with a potential new one. Also, the ability to
31 delete members from the table has been removed. */
32
Roger Sayle7bb3fbb2003-08-08 20:23:06 +000033static unsigned int calc_hash (const unsigned char *, size_t);
Andreas Jaeger1d088de2003-07-06 08:15:36 +020034static void ht_expand (hash_table *);
Zack Weinberga2f7be92003-07-22 16:24:53 +000035static double approx_sqrt (double);
Neil Booth2a967f32001-05-20 06:26:45 +000036
Neil Booth2a967f32001-05-20 06:26:45 +000037/* Calculate the hash of the string STR of length LEN. */
38
39static unsigned int
Roger Sayle7bb3fbb2003-08-08 20:23:06 +000040calc_hash (const unsigned char *str, size_t len)
Neil Booth2a967f32001-05-20 06:26:45 +000041{
Roger Sayle7bb3fbb2003-08-08 20:23:06 +000042 size_t n = len;
Neil Booth2a967f32001-05-20 06:26:45 +000043 unsigned int r = 0;
Neil Bootha078edf2001-05-20 08:13:32 +000044#define HASHSTEP(r, c) ((r) * 67 + ((c) - 113));
Neil Booth2a967f32001-05-20 06:26:45 +000045
46 while (n--)
47 r = HASHSTEP (r, *str++);
48
49 return r + len;
50#undef HASHSTEP
51}
52
53/* Initialize an identifier hashtable. */
54
55hash_table *
Andreas Jaeger1d088de2003-07-06 08:15:36 +020056ht_create (unsigned int order)
Neil Booth2a967f32001-05-20 06:26:45 +000057{
58 unsigned int nslots = 1 << order;
59 hash_table *table;
60
Kaveh R. Ghazi29da5c92003-08-11 21:47:39 +000061 table = xcalloc (1, sizeof (hash_table));
Neil Booth2a967f32001-05-20 06:26:45 +000062
63 /* Strings need no alignment. */
Zack Weinberg43839642003-07-13 17:34:18 +000064 _obstack_begin (&table->stack, 0, 0,
65 (void *(*) (long)) xmalloc,
66 (void (*) (void *)) free);
67
Neil Booth2a967f32001-05-20 06:26:45 +000068 obstack_alignment_mask (&table->stack) = 0;
69
Kaveh R. Ghazi703ad42b2003-07-19 14:47:15 +000070 table->entries = xcalloc (nslots, sizeof (hashnode));
Neil Booth2a967f32001-05-20 06:26:45 +000071 table->nslots = nslots;
72 return table;
73}
74
Neil Boothbef985f2001-08-11 12:37:19 +000075/* Frees all memory associated with a hash table. */
76
77void
Andreas Jaeger1d088de2003-07-06 08:15:36 +020078ht_destroy (hash_table *table)
Neil Boothbef985f2001-08-11 12:37:19 +000079{
80 obstack_free (&table->stack, NULL);
81 free (table->entries);
82 free (table);
83}
84
Neil Booth2a967f32001-05-20 06:26:45 +000085/* Returns the hash entry for the a STR of length LEN. If that string
86 already exists in the table, returns the existing entry, and, if
87 INSERT is CPP_ALLOCED, frees the last obstack object. If the
88 identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
89 returns NULL. Otherwise insert and returns a new entry. A new
90 string is alloced if INSERT is CPP_ALLOC, otherwise INSERT is
91 CPP_ALLOCED and the item is assumed to be at the top of the
92 obstack. */
93hashnode
Roger Sayle7bb3fbb2003-08-08 20:23:06 +000094ht_lookup (hash_table *table, const unsigned char *str, size_t len,
Andreas Jaeger1d088de2003-07-06 08:15:36 +020095 enum ht_lookup_option insert)
Neil Booth2a967f32001-05-20 06:26:45 +000096{
97 unsigned int hash = calc_hash (str, len);
98 unsigned int hash2;
99 unsigned int index;
100 size_t sizemask;
101 hashnode node;
102
103 sizemask = table->nslots - 1;
104 index = hash & sizemask;
Neil Booth2a967f32001-05-20 06:26:45 +0000105 table->searches++;
106
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000107 node = table->entries[index];
108
109 if (node != NULL)
Neil Booth2a967f32001-05-20 06:26:45 +0000110 {
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000111 if (node->hash_value == hash
112 && HT_LEN (node) == (unsigned int) len
113 && !memcmp (HT_STR (node), str, len))
Neil Booth2a967f32001-05-20 06:26:45 +0000114 {
115 if (insert == HT_ALLOCED)
116 /* The string we search for was placed at the end of the
117 obstack. Release it. */
Kaveh R. Ghazifad205f2003-06-16 21:41:10 +0000118 obstack_free (&table->stack, (void *) str);
Neil Booth2a967f32001-05-20 06:26:45 +0000119 return node;
120 }
121
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000122 /* hash2 must be odd, so we're guaranteed to visit every possible
123 location in the table during rehashing. */
124 hash2 = ((hash * 17) & sizemask) | 1;
125
126 for (;;)
127 {
128 table->collisions++;
129 index = (index + hash2) & sizemask;
130 node = table->entries[index];
131 if (node == NULL)
132 break;
133
134 if (node->hash_value == hash
135 && HT_LEN (node) == (unsigned int) len
136 && !memcmp (HT_STR (node), str, len))
137 {
138 if (insert == HT_ALLOCED)
139 /* The string we search for was placed at the end of the
140 obstack. Release it. */
141 obstack_free (&table->stack, (void *) str);
142 return node;
143 }
144 }
Neil Booth2a967f32001-05-20 06:26:45 +0000145 }
146
147 if (insert == HT_NO_INSERT)
148 return NULL;
149
150 node = (*table->alloc_node) (table);
151 table->entries[index] = node;
152
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000153 HT_LEN (node) = (unsigned int) len;
Gabriel Dos Reis5e0c54e2003-05-18 13:40:54 +0000154 node->hash_value = hash;
Neil Booth2a967f32001-05-20 06:26:45 +0000155 if (insert == HT_ALLOC)
Zack Weinberg2c3fcba2001-09-10 22:34:03 +0000156 HT_STR (node) = obstack_copy0 (&table->stack, str, len);
Neil Booth2a967f32001-05-20 06:26:45 +0000157 else
158 HT_STR (node) = str;
159
160 if (++table->nelements * 4 >= table->nslots * 3)
161 /* Must expand the string table. */
162 ht_expand (table);
163
164 return node;
165}
166
167/* Double the size of a hash table, re-hashing existing entries. */
168
169static void
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200170ht_expand (hash_table *table)
Neil Booth2a967f32001-05-20 06:26:45 +0000171{
172 hashnode *nentries, *p, *limit;
173 unsigned int size, sizemask;
174
175 size = table->nslots * 2;
Kaveh R. Ghazi703ad42b2003-07-19 14:47:15 +0000176 nentries = xcalloc (size, sizeof (hashnode));
Neil Booth2a967f32001-05-20 06:26:45 +0000177 sizemask = size - 1;
178
179 p = table->entries;
180 limit = p + table->nslots;
181 do
182 if (*p)
183 {
184 unsigned int index, hash, hash2;
185
Gabriel Dos Reis5e0c54e2003-05-18 13:40:54 +0000186 hash = (*p)->hash_value;
Neil Booth2a967f32001-05-20 06:26:45 +0000187 index = hash & sizemask;
188
Roger Sayle4ae2e3e2003-08-22 22:29:17 +0000189 if (nentries[index])
Neil Booth2a967f32001-05-20 06:26:45 +0000190 {
Roger Sayle4ae2e3e2003-08-22 22:29:17 +0000191 hash2 = ((hash * 17) & sizemask) | 1;
192 do
Neil Booth2a967f32001-05-20 06:26:45 +0000193 {
Roger Sayle4ae2e3e2003-08-22 22:29:17 +0000194 index = (index + hash2) & sizemask;
Neil Booth2a967f32001-05-20 06:26:45 +0000195 }
Roger Sayle4ae2e3e2003-08-22 22:29:17 +0000196 while (nentries[index]);
Neil Booth2a967f32001-05-20 06:26:45 +0000197 }
Roger Sayle4ae2e3e2003-08-22 22:29:17 +0000198 nentries[index] = *p;
Neil Booth2a967f32001-05-20 06:26:45 +0000199 }
200 while (++p < limit);
201
202 free (table->entries);
203 table->entries = nentries;
204 table->nslots = size;
205}
206
207/* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
208 the node, and V. */
209void
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200210ht_forall (hash_table *table, ht_cb cb, const void *v)
Neil Booth2a967f32001-05-20 06:26:45 +0000211{
212 hashnode *p, *limit;
213
214 p = table->entries;
215 limit = p + table->nslots;
216 do
217 if (*p)
218 {
219 if ((*cb) (table->pfile, *p, v) == 0)
220 break;
221 }
222 while (++p < limit);
223}
224
225/* Dump allocation statistics to stderr. */
226
227void
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200228ht_dump_statistics (hash_table *table)
Neil Booth2a967f32001-05-20 06:26:45 +0000229{
230 size_t nelts, nids, overhead, headers;
231 size_t total_bytes, longest, sum_of_squares;
232 double exp_len, exp_len2, exp2_len;
233 hashnode *p, *limit;
234
235#define SCALE(x) ((unsigned long) ((x) < 1024*10 \
236 ? (x) \
237 : ((x) < 1024*1024*10 \
238 ? (x) / 1024 \
239 : (x) / (1024*1024))))
240#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
241
242 total_bytes = longest = sum_of_squares = nids = 0;
243 p = table->entries;
244 limit = p + table->nslots;
245 do
246 if (*p)
247 {
248 size_t n = HT_LEN (*p);
249
250 total_bytes += n;
251 sum_of_squares += n * n;
252 if (n > longest)
253 longest = n;
254 nids++;
255 }
256 while (++p < limit);
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200257
Neil Booth2a967f32001-05-20 06:26:45 +0000258 nelts = table->nelements;
259 overhead = obstack_memory_used (&table->stack) - total_bytes;
260 headers = table->nslots * sizeof (hashnode);
261
262 fprintf (stderr, "\nString pool\nentries\t\t%lu\n",
263 (unsigned long) nelts);
264 fprintf (stderr, "identifiers\t%lu (%.2f%%)\n",
265 (unsigned long) nids, nids * 100.0 / nelts);
266 fprintf (stderr, "slots\t\t%lu\n",
267 (unsigned long) table->nslots);
268 fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n",
269 SCALE (total_bytes), LABEL (total_bytes),
270 SCALE (overhead), LABEL (overhead));
271 fprintf (stderr, "table size\t%lu%c\n",
272 SCALE (headers), LABEL (headers));
273
274 exp_len = (double)total_bytes / (double)nelts;
275 exp2_len = exp_len * exp_len;
276 exp_len2 = (double) sum_of_squares / (double) nelts;
277
278 fprintf (stderr, "coll/search\t%.4f\n",
279 (double) table->collisions / (double) table->searches);
280 fprintf (stderr, "ins/search\t%.4f\n",
281 (double) nelts / (double) table->searches);
282 fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n",
283 exp_len, approx_sqrt (exp_len2 - exp2_len));
284 fprintf (stderr, "longest entry\t%lu\n",
285 (unsigned long) longest);
286#undef SCALE
287#undef LABEL
288}
289
290/* Return the approximate positive square root of a number N. This is for
291 statistical reports, not code generation. */
Zack Weinberga2f7be92003-07-22 16:24:53 +0000292static double
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200293approx_sqrt (double x)
Neil Booth2a967f32001-05-20 06:26:45 +0000294{
295 double s, d;
296
297 if (x < 0)
298 abort ();
299 if (x == 0)
300 return 0;
301
302 s = x;
303 do
304 {
305 d = (s * s - x) / (2 * s);
306 s -= d;
307 }
308 while (d > .0001);
309 return s;
310}