blob: 9a2fae0f78d2d40fdc647f85db5e9c1f01398d02 [file] [log] [blame]
Neil Booth2a967f32001-05-20 06:26:45 +00001/* Hash tables.
Thomas Koenigb18a97e2021-09-13 19:49:49 +02002 Copyright (C) 2000-2021 Free Software Foundation, Inc.
Neil Booth2a967f32001-05-20 06:26:45 +00003
4This program is free software; you can redistribute it and/or modify it
5under the terms of the GNU General Public License as published by the
Jakub Jelinek748086b2009-04-09 17:00:19 +02006Free Software Foundation; either version 3, or (at your option) any
Neil Booth2a967f32001-05-20 06:26:45 +00007later version.
8
9This program is distributed in the hope that it will be useful,
10but WITHOUT ANY WARRANTY; without even the implied warranty of
11MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12GNU General Public License for more details.
13
14You should have received a copy of the GNU General Public License
Jakub Jelinek748086b2009-04-09 17:00:19 +020015along with this program; see the file COPYING3. If not see
16<http://www.gnu.org/licenses/>.
Neil Booth2a967f32001-05-20 06:26:45 +000017
18 In other words, you are welcome to use, share and improve this program.
19 You are forbidden to forbid anyone else to use, share and improve
20 what you give them. Help stamp out software-hoarding! */
21
22#include "config.h"
23#include "system.h"
Paolo Bonzini4f4e53dd2004-05-24 10:50:45 +000024#include "symtab.h"
Neil Booth2a967f32001-05-20 06:26:45 +000025
26/* The code below is a specialization of Vladimir Makarov's expandable
27 hash tables (see libiberty/hashtab.c). The abstraction penalty was
28 too high to continue using the generic form. This code knows
29 intrinsically how to calculate a hash value, and how to compare an
Tom Tromeydae41742008-05-21 15:00:59 +000030 existing entry with a potential new one. */
Neil Booth2a967f32001-05-20 06:26:45 +000031
Roger Sayle7bb3fbb2003-08-08 20:23:06 +000032static unsigned int calc_hash (const unsigned char *, size_t);
Diego Novillo0823efe2012-08-14 21:56:07 -040033static void ht_expand (cpp_hash_table *);
Zack Weinberga2f7be92003-07-22 16:24:53 +000034static double approx_sqrt (double);
Neil Booth2a967f32001-05-20 06:26:45 +000035
Tom Tromeydae41742008-05-21 15:00:59 +000036/* A deleted entry. */
37#define DELETED ((hashnode) -1)
38
Neil Booth2a967f32001-05-20 06:26:45 +000039/* Calculate the hash of the string STR of length LEN. */
40
41static unsigned int
Roger Sayle7bb3fbb2003-08-08 20:23:06 +000042calc_hash (const unsigned char *str, size_t len)
Neil Booth2a967f32001-05-20 06:26:45 +000043{
Roger Sayle7bb3fbb2003-08-08 20:23:06 +000044 size_t n = len;
Neil Booth2a967f32001-05-20 06:26:45 +000045 unsigned int r = 0;
Neil Booth2a967f32001-05-20 06:26:45 +000046
47 while (n--)
Zack Weinbergc6e83802004-06-05 20:58:06 +000048 r = HT_HASHSTEP (r, *str++);
Neil Booth2a967f32001-05-20 06:26:45 +000049
Zack Weinbergc6e83802004-06-05 20:58:06 +000050 return HT_HASHFINISH (r, len);
Neil Booth2a967f32001-05-20 06:26:45 +000051}
52
53/* Initialize an identifier hashtable. */
54
Diego Novillo0823efe2012-08-14 21:56:07 -040055cpp_hash_table *
Andreas Jaeger1d088de2003-07-06 08:15:36 +020056ht_create (unsigned int order)
Neil Booth2a967f32001-05-20 06:26:45 +000057{
58 unsigned int nslots = 1 << order;
Diego Novillo0823efe2012-08-14 21:56:07 -040059 cpp_hash_table *table;
Neil Booth2a967f32001-05-20 06:26:45 +000060
Diego Novillo0823efe2012-08-14 21:56:07 -040061 table = XCNEW (cpp_hash_table);
Neil Booth2a967f32001-05-20 06:26:45 +000062
63 /* Strings need no alignment. */
Alan Modra19a9ba62014-10-22 12:11:31 +103064 obstack_specify_allocation (&table->stack, 0, 0, xmalloc, free);
Zack Weinberg43839642003-07-13 17:34:18 +000065
Neil Booth2a967f32001-05-20 06:26:45 +000066 obstack_alignment_mask (&table->stack) = 0;
67
Gabriel Dos Reisc3f829c2005-05-28 15:52:48 +000068 table->entries = XCNEWVEC (hashnode, nslots);
Geoffrey Keatingb453c952004-05-30 00:49:06 +000069 table->entries_owned = true;
Neil Booth2a967f32001-05-20 06:26:45 +000070 table->nslots = nslots;
71 return table;
72}
73
Neil Boothbef985f2001-08-11 12:37:19 +000074/* Frees all memory associated with a hash table. */
75
76void
Diego Novillo0823efe2012-08-14 21:56:07 -040077ht_destroy (cpp_hash_table *table)
Neil Boothbef985f2001-08-11 12:37:19 +000078{
79 obstack_free (&table->stack, NULL);
Geoffrey Keatingb453c952004-05-30 00:49:06 +000080 if (table->entries_owned)
81 free (table->entries);
Neil Boothbef985f2001-08-11 12:37:19 +000082 free (table);
83}
84
Neil Booth2a967f32001-05-20 06:26:45 +000085/* Returns the hash entry for the a STR of length LEN. If that string
Tom Tromeydae41742008-05-21 15:00:59 +000086 already exists in the table, returns the existing entry. If the
Neil Booth2a967f32001-05-20 06:26:45 +000087 identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
88 returns NULL. Otherwise insert and returns a new entry. A new
Tom Tromeydae41742008-05-21 15:00:59 +000089 string is allocated. */
Neil Booth2a967f32001-05-20 06:26:45 +000090hashnode
Diego Novillo0823efe2012-08-14 21:56:07 -040091ht_lookup (cpp_hash_table *table, const unsigned char *str, size_t len,
Andreas Jaeger1d088de2003-07-06 08:15:36 +020092 enum ht_lookup_option insert)
Neil Booth2a967f32001-05-20 06:26:45 +000093{
Zack Weinbergc6e83802004-06-05 20:58:06 +000094 return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
95 insert);
96}
97
98hashnode
Diego Novillo0823efe2012-08-14 21:56:07 -040099ht_lookup_with_hash (cpp_hash_table *table, const unsigned char *str,
Zack Weinbergc6e83802004-06-05 20:58:06 +0000100 size_t len, unsigned int hash,
101 enum ht_lookup_option insert)
102{
Neil Booth2a967f32001-05-20 06:26:45 +0000103 unsigned int hash2;
104 unsigned int index;
Tom Tromeydae41742008-05-21 15:00:59 +0000105 unsigned int deleted_index = table->nslots;
Neil Booth2a967f32001-05-20 06:26:45 +0000106 size_t sizemask;
107 hashnode node;
108
109 sizemask = table->nslots - 1;
110 index = hash & sizemask;
Neil Booth2a967f32001-05-20 06:26:45 +0000111 table->searches++;
112
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000113 node = table->entries[index];
Tom Tromeydae41742008-05-21 15:00:59 +0000114
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000115 if (node != NULL)
Neil Booth2a967f32001-05-20 06:26:45 +0000116 {
Tom Tromeydae41742008-05-21 15:00:59 +0000117 if (node == DELETED)
118 deleted_index = index;
119 else if (node->hash_value == hash
120 && HT_LEN (node) == (unsigned int) len
121 && !memcmp (HT_STR (node), str, len))
122 return node;
Neil Booth2a967f32001-05-20 06:26:45 +0000123
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000124 /* hash2 must be odd, so we're guaranteed to visit every possible
125 location in the table during rehashing. */
126 hash2 = ((hash * 17) & sizemask) | 1;
127
128 for (;;)
129 {
130 table->collisions++;
131 index = (index + hash2) & sizemask;
132 node = table->entries[index];
133 if (node == NULL)
134 break;
135
Tom Tromeydae41742008-05-21 15:00:59 +0000136 if (node == DELETED)
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000137 {
Tom Tromeydae41742008-05-21 15:00:59 +0000138 if (deleted_index != table->nslots)
139 deleted_index = index;
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000140 }
Tom Tromeydae41742008-05-21 15:00:59 +0000141 else if (node->hash_value == hash
142 && HT_LEN (node) == (unsigned int) len
143 && !memcmp (HT_STR (node), str, len))
144 return node;
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000145 }
Neil Booth2a967f32001-05-20 06:26:45 +0000146 }
147
148 if (insert == HT_NO_INSERT)
149 return NULL;
150
Tom Tromeydae41742008-05-21 15:00:59 +0000151 /* We prefer to overwrite the first deleted slot we saw. */
152 if (deleted_index != table->nslots)
153 index = deleted_index;
154
Neil Booth2a967f32001-05-20 06:26:45 +0000155 node = (*table->alloc_node) (table);
156 table->entries[index] = node;
157
Roger Sayle7bb3fbb2003-08-08 20:23:06 +0000158 HT_LEN (node) = (unsigned int) len;
Gabriel Dos Reis5e0c54e2003-05-18 13:40:54 +0000159 node->hash_value = hash;
Tom Tromeydae41742008-05-21 15:00:59 +0000160
161 if (table->alloc_subobject)
162 {
Jerry Quinnf1bf4102009-07-18 03:22:16 +0000163 char *chars = (char *) table->alloc_subobject (len + 1);
Tom Tromeydae41742008-05-21 15:00:59 +0000164 memcpy (chars, str, len);
165 chars[len] = '\0';
166 HT_STR (node) = (const unsigned char *) chars;
167 }
Neil Booth2a967f32001-05-20 06:26:45 +0000168 else
Tom Tromeydae41742008-05-21 15:00:59 +0000169 HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,
170 str, len);
Neil Booth2a967f32001-05-20 06:26:45 +0000171
172 if (++table->nelements * 4 >= table->nslots * 3)
173 /* Must expand the string table. */
174 ht_expand (table);
175
176 return node;
177}
178
179/* Double the size of a hash table, re-hashing existing entries. */
180
181static void
Diego Novillo0823efe2012-08-14 21:56:07 -0400182ht_expand (cpp_hash_table *table)
Neil Booth2a967f32001-05-20 06:26:45 +0000183{
184 hashnode *nentries, *p, *limit;
185 unsigned int size, sizemask;
186
187 size = table->nslots * 2;
Gabriel Dos Reisc3f829c2005-05-28 15:52:48 +0000188 nentries = XCNEWVEC (hashnode, size);
Neil Booth2a967f32001-05-20 06:26:45 +0000189 sizemask = size - 1;
190
191 p = table->entries;
192 limit = p + table->nslots;
193 do
Tom Tromeydae41742008-05-21 15:00:59 +0000194 if (*p && *p != DELETED)
Neil Booth2a967f32001-05-20 06:26:45 +0000195 {
196 unsigned int index, hash, hash2;
197
Gabriel Dos Reis5e0c54e2003-05-18 13:40:54 +0000198 hash = (*p)->hash_value;
Neil Booth2a967f32001-05-20 06:26:45 +0000199 index = hash & sizemask;
200
Roger Sayle4ae2e3e2003-08-22 22:29:17 +0000201 if (nentries[index])
Neil Booth2a967f32001-05-20 06:26:45 +0000202 {
Roger Sayle4ae2e3e2003-08-22 22:29:17 +0000203 hash2 = ((hash * 17) & sizemask) | 1;
204 do
Neil Booth2a967f32001-05-20 06:26:45 +0000205 {
Roger Sayle4ae2e3e2003-08-22 22:29:17 +0000206 index = (index + hash2) & sizemask;
Neil Booth2a967f32001-05-20 06:26:45 +0000207 }
Roger Sayle4ae2e3e2003-08-22 22:29:17 +0000208 while (nentries[index]);
Neil Booth2a967f32001-05-20 06:26:45 +0000209 }
Roger Sayle4ae2e3e2003-08-22 22:29:17 +0000210 nentries[index] = *p;
Neil Booth2a967f32001-05-20 06:26:45 +0000211 }
212 while (++p < limit);
213
Geoffrey Keatingb453c952004-05-30 00:49:06 +0000214 if (table->entries_owned)
215 free (table->entries);
216 table->entries_owned = true;
Neil Booth2a967f32001-05-20 06:26:45 +0000217 table->entries = nentries;
218 table->nslots = size;
219}
220
221/* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
222 the node, and V. */
223void
Diego Novillo0823efe2012-08-14 21:56:07 -0400224ht_forall (cpp_hash_table *table, ht_cb cb, const void *v)
Neil Booth2a967f32001-05-20 06:26:45 +0000225{
226 hashnode *p, *limit;
227
228 p = table->entries;
229 limit = p + table->nslots;
230 do
Tom Tromeydae41742008-05-21 15:00:59 +0000231 if (*p && *p != DELETED)
Neil Booth2a967f32001-05-20 06:26:45 +0000232 {
233 if ((*cb) (table->pfile, *p, v) == 0)
234 break;
235 }
236 while (++p < limit);
237}
238
Tom Tromeydae41742008-05-21 15:00:59 +0000239/* Like ht_forall, but a nonzero return from the callback means that
240 the entry should be removed from the table. */
241void
Diego Novillo0823efe2012-08-14 21:56:07 -0400242ht_purge (cpp_hash_table *table, ht_cb cb, const void *v)
Tom Tromeydae41742008-05-21 15:00:59 +0000243{
244 hashnode *p, *limit;
245
246 p = table->entries;
247 limit = p + table->nslots;
248 do
249 if (*p && *p != DELETED)
250 {
251 if ((*cb) (table->pfile, *p, v))
252 *p = DELETED;
253 }
254 while (++p < limit);
255}
256
Geoffrey Keatingb453c952004-05-30 00:49:06 +0000257/* Restore the hash table. */
258void
Diego Novillo0823efe2012-08-14 21:56:07 -0400259ht_load (cpp_hash_table *ht, hashnode *entries,
Geoffrey Keatingb453c952004-05-30 00:49:06 +0000260 unsigned int nslots, unsigned int nelements,
261 bool own)
262{
263 if (ht->entries_owned)
264 free (ht->entries);
265 ht->entries = entries;
266 ht->nslots = nslots;
267 ht->nelements = nelements;
268 ht->entries_owned = own;
269}
270
Neil Booth2a967f32001-05-20 06:26:45 +0000271/* Dump allocation statistics to stderr. */
272
273void
Diego Novillo0823efe2012-08-14 21:56:07 -0400274ht_dump_statistics (cpp_hash_table *table)
Neil Booth2a967f32001-05-20 06:26:45 +0000275{
276 size_t nelts, nids, overhead, headers;
Tom Tromeydae41742008-05-21 15:00:59 +0000277 size_t total_bytes, longest, deleted = 0;
Serge Belyshev0fd9e8d2004-09-06 13:22:48 +0000278 double sum_of_squares, exp_len, exp_len2, exp2_len;
Neil Booth2a967f32001-05-20 06:26:45 +0000279 hashnode *p, *limit;
280
281#define SCALE(x) ((unsigned long) ((x) < 1024*10 \
282 ? (x) \
283 : ((x) < 1024*1024*10 \
284 ? (x) / 1024 \
285 : (x) / (1024*1024))))
286#define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
287
288 total_bytes = longest = sum_of_squares = nids = 0;
289 p = table->entries;
290 limit = p + table->nslots;
291 do
Tom Tromeydae41742008-05-21 15:00:59 +0000292 if (*p == DELETED)
293 ++deleted;
294 else if (*p)
Neil Booth2a967f32001-05-20 06:26:45 +0000295 {
296 size_t n = HT_LEN (*p);
297
298 total_bytes += n;
Serge Belyshev0fd9e8d2004-09-06 13:22:48 +0000299 sum_of_squares += (double) n * n;
Neil Booth2a967f32001-05-20 06:26:45 +0000300 if (n > longest)
301 longest = n;
302 nids++;
303 }
304 while (++p < limit);
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200305
Neil Booth2a967f32001-05-20 06:26:45 +0000306 nelts = table->nelements;
Neil Booth2a967f32001-05-20 06:26:45 +0000307 headers = table->nslots * sizeof (hashnode);
308
Martin Liska60448172019-02-26 18:27:52 +0100309 fprintf (stderr, "\nString pool\n%-32s%lu\n", "entries:",
Neil Booth2a967f32001-05-20 06:26:45 +0000310 (unsigned long) nelts);
Martin Liska60448172019-02-26 18:27:52 +0100311 fprintf (stderr, "%-32s%lu (%.2f%%)\n", "identifiers:",
Neil Booth2a967f32001-05-20 06:26:45 +0000312 (unsigned long) nids, nids * 100.0 / nelts);
Martin Liska60448172019-02-26 18:27:52 +0100313 fprintf (stderr, "%-32s%lu\n", "slots:",
Neil Booth2a967f32001-05-20 06:26:45 +0000314 (unsigned long) table->nslots);
Martin Liska60448172019-02-26 18:27:52 +0100315 fprintf (stderr, "%-32s%lu\n", "deleted:",
Tom Tromeydae41742008-05-21 15:00:59 +0000316 (unsigned long) deleted);
Martin Liska46aeb072018-11-05 14:35:09 +0100317
318 if (table->alloc_subobject)
Martin Liska60448172019-02-26 18:27:52 +0100319 fprintf (stderr, "%-32s%lu%c\n", "GGC bytes:",
Martin Liska46aeb072018-11-05 14:35:09 +0100320 SCALE (total_bytes), LABEL (total_bytes));
321 else
322 {
323 overhead = obstack_memory_used (&table->stack) - total_bytes;
Martin Liska60448172019-02-26 18:27:52 +0100324 fprintf (stderr, "%-32s%lu%c (%lu%c overhead)\n",
325 "obstack bytes:",
Martin Liska037903c2018-11-05 15:25:37 +0100326 SCALE (total_bytes), LABEL (total_bytes),
327 SCALE (overhead), LABEL (overhead));
Martin Liska46aeb072018-11-05 14:35:09 +0100328 }
Martin Liska60448172019-02-26 18:27:52 +0100329 fprintf (stderr, "%-32s%lu%c\n", "table size:",
Neil Booth2a967f32001-05-20 06:26:45 +0000330 SCALE (headers), LABEL (headers));
331
332 exp_len = (double)total_bytes / (double)nelts;
333 exp2_len = exp_len * exp_len;
334 exp_len2 = (double) sum_of_squares / (double) nelts;
335
Martin Liska60448172019-02-26 18:27:52 +0100336 fprintf (stderr, "%-32s%.4f\n", "coll/search:",
Neil Booth2a967f32001-05-20 06:26:45 +0000337 (double) table->collisions / (double) table->searches);
Martin Liska60448172019-02-26 18:27:52 +0100338 fprintf (stderr, "%-32s%.4f\n", "ins/search:",
Neil Booth2a967f32001-05-20 06:26:45 +0000339 (double) nelts / (double) table->searches);
Martin Liska60448172019-02-26 18:27:52 +0100340 fprintf (stderr, "%-32s%.2f bytes (+/- %.2f)\n",
341 "avg. entry:",
Neil Booth2a967f32001-05-20 06:26:45 +0000342 exp_len, approx_sqrt (exp_len2 - exp2_len));
Martin Liska60448172019-02-26 18:27:52 +0100343 fprintf (stderr, "%-32s%lu\n", "longest entry:",
Neil Booth2a967f32001-05-20 06:26:45 +0000344 (unsigned long) longest);
345#undef SCALE
346#undef LABEL
347}
348
349/* Return the approximate positive square root of a number N. This is for
350 statistical reports, not code generation. */
Zack Weinberga2f7be92003-07-22 16:24:53 +0000351static double
Andreas Jaeger1d088de2003-07-06 08:15:36 +0200352approx_sqrt (double x)
Neil Booth2a967f32001-05-20 06:26:45 +0000353{
354 double s, d;
355
356 if (x < 0)
357 abort ();
358 if (x == 0)
359 return 0;
360
361 s = x;
362 do
363 {
364 d = (s * s - x) / (2 * s);
365 s -= d;
366 }
367 while (d > .0001);
368 return s;
369}