blob: 88b1a293456bb4f8a04db3b67744d1c8a57b0a21 [file] [log] [blame]
Neil Booth2a967f32001-05-20 06:26:45 +00001/* Hash tables.
Andreas Jaeger1d088de2003-07-06 08:15:36 +02002 Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
Neil Booth2a967f32001-05-20 06:26:45 +00003
4This program is free software; you can redistribute it and/or modify it
5under the terms of the GNU General Public License as published by the
6Free Software Foundation; either version 2, or (at your option) any
7later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
15along with this program; if not, write to the Free Software
16Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17
18 In other words, you are welcome to use, share and improve this program.
19 You are forbidden to forbid anyone else to use, share and improve
20 what you give them. Help stamp out software-hoarding! */
21
22#include "config.h"
23#include "system.h"
Zack Weinberg4977bab2002-12-16 18:23:00 +000024#include "coretypes.h"
25#include "tm.h"
Neil Booth2a967f32001-05-20 06:26:45 +000026#include "hashtable.h"
27
28/* The code below is a specialization of Vladimir Makarov's expandable
29 hash tables (see libiberty/hashtab.c). The abstraction penalty was
30 too high to continue using the generic form. This code knows
31 intrinsically how to calculate a hash value, and how to compare an
32 existing entry with a potential new one. Also, the ability to
33 delete members from the table has been removed. */
34
Andreas Jaeger1d088de2003-07-06 08:15:36 +020035static unsigned int calc_hash (const unsigned char *, unsigned int);
36static void ht_expand (hash_table *);
Neil Booth2a967f32001-05-20 06:26:45 +000037
Neil Booth2a967f32001-05-20 06:26:45 +000038/* Calculate the hash of the string STR of length LEN. */
39
40static unsigned int
Andreas Jaeger1d088de2003-07-06 08:15:36 +020041calc_hash (const unsigned char *str, unsigned int len)
Neil Booth2a967f32001-05-20 06:26:45 +000042{
43 unsigned int n = len;
44 unsigned int r = 0;
Neil Bootha078edf2001-05-20 08:13:32 +000045#define HASHSTEP(r, c) ((r) * 67 + ((c) - 113));
Neil Booth2a967f32001-05-20 06:26:45 +000046
47 while (n--)
48 r = HASHSTEP (r, *str++);
49
50 return r + len;
51#undef HASHSTEP
52}
53
54/* Initialize an identifier hashtable. */
55
56hash_table *
Andreas Jaeger1d088de2003-07-06 08:15:36 +020057ht_create (unsigned int order)
Neil Booth2a967f32001-05-20 06:26:45 +000058{
59 unsigned int nslots = 1 << order;
60 hash_table *table;
61
62 table = (hash_table *) xmalloc (sizeof (hash_table));
63 memset (table, 0, sizeof (hash_table));
64
65 /* Strings need no alignment. */
66 gcc_obstack_init (&table->stack);
67 obstack_alignment_mask (&table->stack) = 0;
68
69 table->entries = (hashnode *) xcalloc (nslots, sizeof (hashnode));
70 table->nslots = nslots;
71 return table;
72}
73
Neil Boothbef985f2001-08-11 12:37:19 +000074/* Frees all memory associated with a hash table. */
75
76void
Andreas Jaeger1d088de2003-07-06 08:15:36 +020077ht_destroy (hash_table *table)
Neil Boothbef985f2001-08-11 12:37:19 +000078{
79 obstack_free (&table->stack, NULL);
80 free (table->entries);
81 free (table);
82}
83
Neil Booth2a967f32001-05-20 06:26:45 +000084/* Returns the hash entry for the a STR of length LEN. If that string
85 already exists in the table, returns the existing entry, and, if
86 INSERT is CPP_ALLOCED, frees the last obstack object. If the
87 identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
88 returns NULL. Otherwise insert and returns a new entry. A new
89 string is alloced if INSERT is CPP_ALLOC, otherwise INSERT is
90 CPP_ALLOCED and the item is assumed to be at the top of the
91 obstack. */
92hashnode
Andreas Jaeger1d088de2003-07-06 08:15:36 +020093ht_lookup (hash_table *table, const unsigned char *str, unsigned int len,
94 enum ht_lookup_option insert)
Neil Booth2a967f32001-05-20 06:26:45 +000095{
96 unsigned int hash = calc_hash (str, len);
97 unsigned int hash2;
98 unsigned int index;
99 size_t sizemask;
100 hashnode node;
101
102 sizemask = table->nslots - 1;
103 index = hash & sizemask;
104
105 /* hash2 must be odd, so we're guaranteed to visit every possible
106 location in the table during rehashing. */
107 hash2 = ((hash * 17) & sizemask) | 1;
108 table->searches++;
109
110 for (;;)
111 {
112 node = table->entries[index];
113
114 if (node == NULL)
115 break;
116
Gabriel Dos Reis5e0c54e2003-05-18 13:40:54 +0000117 if (node->hash_value == hash && HT_LEN (node) == len
118 && !memcmp (HT_STR (node), str, len))
Neil Booth2a967f32001-05-20 06:26:45 +0000119 {
120 if (insert == HT_ALLOCED)
121 /* The string we search for was placed at the end of the
122 obstack. Release it. */
Kaveh R. Ghazifad205f2003-06-16 21:41:10 +0000123 obstack_free (&table->stack, (void *) str);
Neil Booth2a967f32001-05-20 06:26:45 +0000124 return node;
125 }
126
127 index = (index + hash2) & sizemask;
128 table->collisions++;
129 }
130
131 if (insert == HT_NO_INSERT)
132 return NULL;
133
134 node = (*table->alloc_node) (table);
135 table->entries[index] = node;
136
137 HT_LEN (node) = len;
Gabriel Dos Reis5e0c54e2003-05-18 13:40:54 +0000138 node->hash_value = hash;
Neil Booth2a967f32001-05-20 06:26:45 +0000139 if (insert == HT_ALLOC)
Zack Weinberg2c3fcba2001-09-10 22:34:03 +0000140 HT_STR (node) = obstack_copy0 (&table->stack, str, len);
Neil Booth2a967f32001-05-20 06:26:45 +0000141 else
142 HT_STR (node) = str;
143
144 if (++table->nelements * 4 >= table->nslots * 3)
145 /* Must expand the string table. */
146 ht_expand (table);
147
148 return node;
149}
150
151/* Double the size of a hash table, re-hashing existing entries. */
152
153static void
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200154ht_expand (hash_table *table)
Neil Booth2a967f32001-05-20 06:26:45 +0000155{
156 hashnode *nentries, *p, *limit;
157 unsigned int size, sizemask;
158
159 size = table->nslots * 2;
160 nentries = (hashnode *) xcalloc (size, sizeof (hashnode));
161 sizemask = size - 1;
162
163 p = table->entries;
164 limit = p + table->nslots;
165 do
166 if (*p)
167 {
168 unsigned int index, hash, hash2;
169
Gabriel Dos Reis5e0c54e2003-05-18 13:40:54 +0000170 hash = (*p)->hash_value;
Neil Booth2a967f32001-05-20 06:26:45 +0000171 hash2 = ((hash * 17) & sizemask) | 1;
172 index = hash & sizemask;
173
174 for (;;)
175 {
176 if (! nentries[index])
177 {
178 nentries[index] = *p;
179 break;
180 }
181
182 index = (index + hash2) & sizemask;
183 }
184 }
185 while (++p < limit);
186
187 free (table->entries);
188 table->entries = nentries;
189 table->nslots = size;
190}
191
192/* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
193 the node, and V. */
194void
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200195ht_forall (hash_table *table, ht_cb cb, const void *v)
Neil Booth2a967f32001-05-20 06:26:45 +0000196{
197 hashnode *p, *limit;
198
199 p = table->entries;
200 limit = p + table->nslots;
201 do
202 if (*p)
203 {
204 if ((*cb) (table->pfile, *p, v) == 0)
205 break;
206 }
207 while (++p < limit);
208}
209
210/* Dump allocation statistics to stderr. */
211
212void
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200213ht_dump_statistics (hash_table *table)
Neil Booth2a967f32001-05-20 06:26:45 +0000214{
215 size_t nelts, nids, overhead, headers;
216 size_t total_bytes, longest, sum_of_squares;
217 double exp_len, exp_len2, exp2_len;
218 hashnode *p, *limit;
219
220#define SCALE(x) ((unsigned long) ((x) < 1024*10 \
221 ? (x) \
222 : ((x) < 1024*1024*10 \
223 ? (x) / 1024 \
224 : (x) / (1024*1024))))
225#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
226
227 total_bytes = longest = sum_of_squares = nids = 0;
228 p = table->entries;
229 limit = p + table->nslots;
230 do
231 if (*p)
232 {
233 size_t n = HT_LEN (*p);
234
235 total_bytes += n;
236 sum_of_squares += n * n;
237 if (n > longest)
238 longest = n;
239 nids++;
240 }
241 while (++p < limit);
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200242
Neil Booth2a967f32001-05-20 06:26:45 +0000243 nelts = table->nelements;
244 overhead = obstack_memory_used (&table->stack) - total_bytes;
245 headers = table->nslots * sizeof (hashnode);
246
247 fprintf (stderr, "\nString pool\nentries\t\t%lu\n",
248 (unsigned long) nelts);
249 fprintf (stderr, "identifiers\t%lu (%.2f%%)\n",
250 (unsigned long) nids, nids * 100.0 / nelts);
251 fprintf (stderr, "slots\t\t%lu\n",
252 (unsigned long) table->nslots);
253 fprintf (stderr, "bytes\t\t%lu%c (%lu%c overhead)\n",
254 SCALE (total_bytes), LABEL (total_bytes),
255 SCALE (overhead), LABEL (overhead));
256 fprintf (stderr, "table size\t%lu%c\n",
257 SCALE (headers), LABEL (headers));
258
259 exp_len = (double)total_bytes / (double)nelts;
260 exp2_len = exp_len * exp_len;
261 exp_len2 = (double) sum_of_squares / (double) nelts;
262
263 fprintf (stderr, "coll/search\t%.4f\n",
264 (double) table->collisions / (double) table->searches);
265 fprintf (stderr, "ins/search\t%.4f\n",
266 (double) nelts / (double) table->searches);
267 fprintf (stderr, "avg. entry\t%.2f bytes (+/- %.2f)\n",
268 exp_len, approx_sqrt (exp_len2 - exp2_len));
269 fprintf (stderr, "longest entry\t%lu\n",
270 (unsigned long) longest);
271#undef SCALE
272#undef LABEL
273}
274
275/* Return the approximate positive square root of a number N. This is for
276 statistical reports, not code generation. */
277double
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200278approx_sqrt (double x)
Neil Booth2a967f32001-05-20 06:26:45 +0000279{
280 double s, d;
281
282 if (x < 0)
283 abort ();
284 if (x == 0)
285 return 0;
286
287 s = x;
288 do
289 {
290 d = (s * s - x) / (2 * s);
291 s -= d;
292 }
293 while (d > .0001);
294 return s;
295}