py/qstr: Separate hash and len from string data.

This allows the compiler to merge strings: e.g. "update",
"difference_update" and "symmetric_difference_update" will all point to the
same memory.

No functional change.

The size reduction depends on the number of qstrs in the build.  The change
this commit brings is:

   bare-arm:    -4 -0.007%
minimal x86:  +150 +0.092% [incl +48(data)]
   unix x64:  -608 -0.118%
unix nanbox:  -572 -0.126% [incl +32(data)]
      stm32: -1392 -0.352% PYBV10
     cc3200:  -448 -0.244%
    esp8266: -1208 -0.173% GENERIC
      esp32: -1028 -0.068% GENERIC[incl -1020(data)]
        nrf:  -440 -0.252% pca10040
        rp2: -1072 -0.217% PICO
       samd:  -368 -0.264% ADAFRUIT_ITSYBITSY_M4_EXPRESS

Performance is also improved (on bare metal at least) for the
core_import_mpy_multi.py, core_import_mpy_single.py and core_qstr.py
performance benchmarks.

Originally at adafruit#4583

Signed-off-by: Artyom Skrobov <tyomitch@gmail.com>
diff --git a/py/qstr.c b/py/qstr.c
index e5b13b7..848a583 100644
--- a/py/qstr.c
+++ b/py/qstr.c
@@ -35,7 +35,6 @@
 
 // NOTE: we are using linear arrays to store and search for qstr's (unique strings, interned strings)
 // ultimately we will replace this with a static hash table of some kind
-// also probably need to include the length in the string data, to allow null bytes in the string
 
 #if MICROPY_DEBUG_VERBOSE // print debugging info
 #define DEBUG_printf DEBUG_printf
@@ -44,34 +43,9 @@
 #endif
 
 // A qstr is an index into the qstr pool.
-// The data for a qstr contains (hash, length, data):
-//  - hash (configurable number of bytes)
-//  - length (configurable number of bytes)
-//  - data ("length" number of bytes)
-//  - \0 terminated (so they can be printed using printf)
+// The data for a qstr is \0 terminated (so they can be printed using printf)
 
-#if MICROPY_QSTR_BYTES_IN_HASH == 1
-    #define Q_HASH_MASK (0xff)
-    #define Q_GET_HASH(q) ((mp_uint_t)(q)[0])
-    #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); } while (0)
-#elif MICROPY_QSTR_BYTES_IN_HASH == 2
-    #define Q_HASH_MASK (0xffff)
-    #define Q_GET_HASH(q) ((mp_uint_t)(q)[0] | ((mp_uint_t)(q)[1] << 8))
-    #define Q_SET_HASH(q, hash) do { (q)[0] = (hash); (q)[1] = (hash) >> 8; } while (0)
-#else
-    #error unimplemented qstr hash decoding
-#endif
-#define Q_GET_ALLOC(q)  (MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + Q_GET_LENGTH(q) + 1)
-#define Q_GET_DATA(q)   ((q) + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN)
-#if MICROPY_QSTR_BYTES_IN_LEN == 1
-    #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH])
-    #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); } while (0)
-#elif MICROPY_QSTR_BYTES_IN_LEN == 2
-    #define Q_GET_LENGTH(q) ((q)[MICROPY_QSTR_BYTES_IN_HASH] | ((q)[MICROPY_QSTR_BYTES_IN_HASH + 1] << 8))
-    #define Q_SET_LENGTH(q, len) do { (q)[MICROPY_QSTR_BYTES_IN_HASH] = (len); (q)[MICROPY_QSTR_BYTES_IN_HASH + 1] = (len) >> 8; } while (0)
-#else
-    #error unimplemented qstr length decoding
-#endif
+#define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1)
 
 #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
 #define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1)
@@ -100,14 +74,32 @@
     return hash;
 }
 
+const qstr_hash_t mp_qstr_const_hashes[] = {
+    #ifndef NO_QSTR
+#define QDEF(id, hash, len, str) hash,
+    #include "genhdr/qstrdefs.generated.h"
+#undef QDEF
+    #endif
+};
+
+const qstr_len_t mp_qstr_const_lengths[] = {
+    #ifndef NO_QSTR
+#define QDEF(id, hash, len, str) len,
+    #include "genhdr/qstrdefs.generated.h"
+#undef QDEF
+    #endif
+};
+
 const qstr_pool_t mp_qstr_const_pool = {
     NULL,               // no previous pool
     0,                  // no previous pool
     MICROPY_ALLOC_QSTR_ENTRIES_INIT,
     MP_QSTRnumber_of,   // corresponds to number of strings in array just below
+    (qstr_hash_t *)mp_qstr_const_hashes,
+    (qstr_len_t *)mp_qstr_const_lengths,
     {
         #ifndef NO_QSTR
-#define QDEF(id, str) str,
+#define QDEF(id, hash, len, str) str,
         #include "genhdr/qstrdefs.generated.h"
 #undef QDEF
         #endif
@@ -130,19 +122,21 @@
     #endif
 }
 
-STATIC const byte *find_qstr(qstr q) {
+STATIC qstr_pool_t *find_qstr(qstr *q) {
     // search pool for this qstr
     // total_prev_len==0 in the final pool, so the loop will always terminate
     qstr_pool_t *pool = MP_STATE_VM(last_pool);
-    while (q < pool->total_prev_len) {
+    while (*q < pool->total_prev_len) {
         pool = pool->prev;
     }
-    return pool->qstrs[q - pool->total_prev_len];
+    *q -= pool->total_prev_len;
+    assert(*q < pool->len);
+    return pool;
 }
 
 // qstr_mutex must be taken while in this function
-STATIC qstr qstr_add(const byte *q_ptr) {
-    DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", Q_GET_HASH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_LENGTH(q_ptr), Q_GET_DATA(q_ptr));
+STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
+    DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr);
 
     // make sure we have room in the pool for a new qstr
     if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) {
@@ -151,7 +145,9 @@
         // Put a lower bound on the allocation size in case the extra qstr pool has few entries
         new_alloc = MAX(MICROPY_ALLOC_QSTR_ENTRIES_INIT, new_alloc);
         #endif
-        qstr_pool_t *pool = m_new_obj_var_maybe(qstr_pool_t, const char *, new_alloc);
+        mp_uint_t pool_size = sizeof(qstr_pool_t)
+            + (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * new_alloc;
+        qstr_pool_t *pool = (qstr_pool_t *)m_malloc_maybe(pool_size);
         if (pool == NULL) {
             // Keep qstr_last_chunk consistent with qstr_pool_t: qstr_last_chunk is not scanned
             // at garbage collection since it's reachable from a qstr_pool_t.  And the caller of
@@ -162,6 +158,8 @@
             QSTR_EXIT();
             m_malloc_fail(new_alloc);
         }
+        pool->hashes = (qstr_hash_t *)(pool->qstrs + new_alloc);
+        pool->lengths = (qstr_len_t *)(pool->hashes + new_alloc);
         pool->prev = MP_STATE_VM(last_pool);
         pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
         pool->alloc = new_alloc;
@@ -171,10 +169,14 @@
     }
 
     // add the new qstr
-    MP_STATE_VM(last_pool)->qstrs[MP_STATE_VM(last_pool)->len++] = q_ptr;
+    mp_uint_t at = MP_STATE_VM(last_pool)->len;
+    MP_STATE_VM(last_pool)->hashes[at] = hash;
+    MP_STATE_VM(last_pool)->lengths[at] = len;
+    MP_STATE_VM(last_pool)->qstrs[at] = q_ptr;
+    MP_STATE_VM(last_pool)->len++;
 
     // return id for the newly-added qstr
-    return MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len - 1;
+    return MP_STATE_VM(last_pool)->total_prev_len + at;
 }
 
 qstr qstr_find_strn(const char *str, size_t str_len) {
@@ -183,9 +185,10 @@
 
     // search pools for the data
     for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
-        for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) {
-            if (Q_GET_HASH(*q) == str_hash && Q_GET_LENGTH(*q) == str_len && memcmp(Q_GET_DATA(*q), str, str_len) == 0) {
-                return pool->total_prev_len + (q - pool->qstrs);
+        for (mp_uint_t at = 0, top = pool->len; at < top; at++) {
+            if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len
+                && memcmp(pool->qstrs[at], str, str_len) == 0) {
+                return pool->total_prev_len + at;
             }
         }
     }
@@ -211,14 +214,14 @@
         }
 
         // compute number of bytes needed to intern this string
-        size_t n_bytes = MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len + 1;
+        size_t n_bytes = len + 1;
 
         if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) {
             // not enough room at end of previously interned string so try to grow
-            byte *new_p = m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false);
+            char *new_p = m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false);
             if (new_p == NULL) {
                 // could not grow existing memory; shrink it to fit previous
-                (void)m_renew_maybe(byte, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false);
+                (void)m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false);
                 MP_STATE_VM(qstr_last_chunk) = NULL;
             } else {
                 // could grow existing memory
@@ -232,10 +235,10 @@
             if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) {
                 al = MICROPY_ALLOC_QSTR_CHUNK_INIT;
             }
-            MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, al);
+            MP_STATE_VM(qstr_last_chunk) = m_new_maybe(char, al);
             if (MP_STATE_VM(qstr_last_chunk) == NULL) {
                 // failed to allocate a large chunk so try with exact size
-                MP_STATE_VM(qstr_last_chunk) = m_new_maybe(byte, n_bytes);
+                MP_STATE_VM(qstr_last_chunk) = m_new_maybe(char, n_bytes);
                 if (MP_STATE_VM(qstr_last_chunk) == NULL) {
                     QSTR_EXIT();
                     m_malloc_fail(n_bytes);
@@ -247,40 +250,38 @@
         }
 
         // allocate memory from the chunk for this new interned string's data
-        byte *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used);
+        char *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used);
         MP_STATE_VM(qstr_last_used) += n_bytes;
 
         // store the interned strings' data
         mp_uint_t hash = qstr_compute_hash((const byte *)str, len);
-        Q_SET_HASH(q_ptr, hash);
-        Q_SET_LENGTH(q_ptr, len);
-        memcpy(q_ptr + MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN, str, len);
-        q_ptr[MICROPY_QSTR_BYTES_IN_HASH + MICROPY_QSTR_BYTES_IN_LEN + len] = '\0';
-        q = qstr_add(q_ptr);
+        memcpy(q_ptr, str, len);
+        q_ptr[len] = '\0';
+        q = qstr_add(hash, len, q_ptr);
     }
     QSTR_EXIT();
     return q;
 }
 
 mp_uint_t qstr_hash(qstr q) {
-    const byte *qd = find_qstr(q);
-    return Q_GET_HASH(qd);
+    qstr_pool_t *pool = find_qstr(&q);
+    return pool->hashes[q];
 }
 
 size_t qstr_len(qstr q) {
-    const byte *qd = find_qstr(q);
-    return Q_GET_LENGTH(qd);
+    qstr_pool_t *pool = find_qstr(&q);
+    return pool->lengths[q];
 }
 
 const char *qstr_str(qstr q) {
-    const byte *qd = find_qstr(q);
-    return (const char *)Q_GET_DATA(qd);
+    qstr_pool_t *pool = find_qstr(&q);
+    return pool->qstrs[q];
 }
 
 const byte *qstr_data(qstr q, size_t *len) {
-    const byte *qd = find_qstr(q);
-    *len = Q_GET_LENGTH(qd);
-    return Q_GET_DATA(qd);
+    qstr_pool_t *pool = find_qstr(&q);
+    *len = pool->lengths[q];
+    return (byte *)pool->qstrs[q];
 }
 
 void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, size_t *n_total_bytes) {
@@ -292,13 +293,14 @@
     for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) {
         *n_pool += 1;
         *n_qstr += pool->len;
-        for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) {
-            *n_str_data_bytes += Q_GET_ALLOC(*q);
+        for (qstr_len_t *l = pool->lengths, *l_top = pool->lengths + pool->len; l < l_top; l++) {
+            *n_str_data_bytes += *l + 1;
         }
         #if MICROPY_ENABLE_GC
         *n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap
         #else
-        *n_total_bytes += sizeof(qstr_pool_t) + sizeof(qstr) * pool->alloc;
+        *n_total_bytes += sizeof(qstr_pool_t)
+            + (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * pool->alloc;
         #endif
     }
     *n_total_bytes += *n_str_data_bytes;
@@ -309,8 +311,8 @@
 void qstr_dump_data(void) {
     QSTR_ENTER();
     for (qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) {
-        for (const byte **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) {
-            mp_printf(&mp_plat_print, "Q(%s)\n", Q_GET_DATA(*q));
+        for (const char **q = pool->qstrs, **q_top = pool->qstrs + pool->len; q < q_top; q++) {
+            mp_printf(&mp_plat_print, "Q(%s)\n", *q);
         }
     }
     QSTR_EXIT();